sc/qa/unit/data/ods/opencl/compiler/horizontal.ods |binary sc/qa/unit/opencl-test.cxx | 32 +++++++ sc/source/core/opencl/formulagroupcl.cxx | 88 ++++++++++----------- sc/source/core/opencl/opbase.cxx | 12 ++ sc/source/core/opencl/opbase.hxx | 7 - 5 files changed, 90 insertions(+), 49 deletions(-)
New commits: commit 912d23636a55473221e3e35768fb9ac42c3e9b76 Author: I-Jui (Ray) Sung <r...@multicorewareinc.com> Date: Mon Nov 18 15:05:25 2013 -0600 GPU Calc: turn on parallel sumifs and parallel sum reduce Change-Id: Id615ea0f5f16a4dfc517aacb30715c2df84553e3 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 3a63c92..9d1e2a9 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -426,9 +426,10 @@ public: } virtual bool NeedParallelReduction(void) const { - if (dynamic_cast<OpSum*>(mpCodeGen.get()) - && !dynamic_cast<OpAverage*>(mpCodeGen.get())) - return GetWindowSize()> 100 && + if ((dynamic_cast<OpSum*>(mpCodeGen.get()) + && !dynamic_cast<OpAverage*>(mpCodeGen.get())) || + dynamic_cast<OpSumIfs*>(mpCodeGen.get())) + return GetWindowSize()> 4 && ( (GetStartFixed() && GetEndFixed()) || (!GetStartFixed() && !GetEndFixed()) ) ; else @@ -457,10 +458,10 @@ public: ss << " tmp = 0.0;\n"; ss << " int loopOffset = l*512;\n"; ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; - ss << " tmp = A[loopOffset + lidx + offset] + " - "A[loopOffset + lidx + offset + 256];\n"; + ss << " tmp = fsum(A[loopOffset + lidx + offset], 0) + " + "fsum(A[loopOffset + lidx + offset + 256], 0);\n"; ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; - ss << " tmp = A[loopOffset + lidx + offset];\n"; + ss << " tmp = fsum(A[loopOffset + lidx + offset], 0);\n"; ss << " shm_buf[lidx] = tmp;\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " for (int i = 128; i >0; i/=2) {\n"; @@ -496,7 +497,8 @@ public: size_t nCurWindowSize = mpDVR->GetRefRowSize(); if (dynamic_cast<OpSum*>(mpCodeGen.get())) { - if (!bIsStartFixed && !bIsEndFixed) + if ((!bIsStartFixed && !bIsEndFixed) || + (bIsStartFixed && bIsEndFixed)) { // set 100 as a temporary threshold for invoking reduction // kernel in NeedParalleLReduction function @@ -510,21 +512,6 @@ public: return nCurWindowSize; } } - - if (bIsStartFixed && bIsEndFixed) - { - // set 100 as a temporary threshold for invoking reduction - // kernel in NeedParalleLReduction function - if (NeedParallelReduction()) - { - std::string temp = Base::GetName() + "[0]"; - ss << "tmp = "; - ss << mpCodeGen->Gen2(temp, "tmp"); - ss << ";\n\t"; - needBody = false; - return nCurWindowSize; - } - } } needBody = true; @@ -576,7 +563,8 @@ public: virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) { - if (!NeedParallelReduction()) + if (!NeedParallelReduction() || + dynamic_cast<OpSumIfs*>(mpCodeGen.get())) return Base::Marshal(k, argno, w, mpProgram); assert(Base::mpClmem == NULL); commit af223ecdf01b76bc1005c8fcc342165639a8823f Author: I-Jui (Ray) Sung <r...@multicorewareinc.com> Date: Mon Nov 18 13:39:02 2013 -0600 GPU Calc: testcases for horizontal ranges AMLOEXT-242 BUG Change-Id: I4b87bdf6183ed81ad767550f5cd49aab51531cf2 diff --git a/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods new file mode 100644 index 0000000..18edf64 Binary files /dev/null and b/sc/qa/unit/data/ods/opencl/compiler/horizontal.ods differ diff --git a/sc/qa/unit/opencl-test.cxx b/sc/qa/unit/opencl-test.cxx index 3f90040..3f02d9b 100644 --- a/sc/qa/unit/opencl-test.cxx +++ b/sc/qa/unit/opencl-test.cxx @@ -81,6 +81,7 @@ public: void testFinacialRateFormula(); void testFinancialAccrintmFormula(); void testFinancialAccrintFormula(); + void testCompilerHorizontal(); void testCompilerNested(); void testFinacialSLNFormula(); void testStatisticalFormulaGammaLn(); @@ -251,6 +252,7 @@ public: CPPUNIT_TEST(testFinacialIRRFormula); CPPUNIT_TEST(testFinacialMIRRFormula); CPPUNIT_TEST(testFinacialRateFormula); + CPPUNIT_TEST(testCompilerHorizontal); CPPUNIT_TEST(testCompilerNested); CPPUNIT_TEST(testFinacialSLNFormula); CPPUNIT_TEST(testFinancialAccrintmFormula); @@ -443,6 +445,36 @@ void ScOpenclTest::enableOpenCL() sc::FormulaGroupInterpreter::enableOpenCL(true); } +void ScOpenclTest::testCompilerHorizontal() +{ + if (!detectOpenCLDevice()) + return; + + ScDocShellRef xDocSh = loadDoc("opencl/compiler/horizontal.", ODS); + ScDocument* pDoc = xDocSh->GetDocument(); + CPPUNIT_ASSERT(pDoc); + enableOpenCL(); + pDoc->CalcAll(); + + ScDocShellRef xDocShRes = loadDoc("opencl/compiler/horizontal.", ODS); + ScDocument* pDocRes = xDocShRes->GetDocument(); + CPPUNIT_ASSERT(pDocRes); + // Check the results of formula cells in the shared formula range. + for (SCROW i = 1; i < 5; ++i) + { + double fLibre = pDoc->GetValue(ScAddress(12, i, 0)); + double fExcel = pDocRes->GetValue(ScAddress(12, i, 0)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel)); + fLibre = pDoc->GetValue(ScAddress(13, i, 0)); + fExcel = pDocRes->GetValue(ScAddress(13, i, 0)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel)); + fLibre = pDoc->GetValue(ScAddress(14, i, 0)); + fExcel = pDocRes->GetValue(ScAddress(14, i, 0)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(fExcel, fLibre, fabs(0.0001*fExcel)); + } + xDocSh->DoClose(); + xDocShRes->DoClose(); +} void ScOpenclTest::testCompilerNested() { if (!detectOpenCLDevice()) commit c935a91260aaba79300cf516bac044ebb8dc7ef9 Author: I-Jui (Ray) Sung <r...@multicorewareinc.com> Date: Mon Nov 18 14:46:30 2013 -0600 GPU Calc: implement horizontal ranges as multiple VectorRefs AMLOEXT-242 Fix Change-Id: Ia3deb221528230554b7c431e926b10428441666a diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index a863c94..3a63c92 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -65,9 +65,10 @@ size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program) const formula::DoubleVectorRefToken* pDVR = dynamic_cast< const formula::DoubleVectorRefToken* >(ref); assert(pDVR); - if (pDVR->GetArrays()[0].mpNumericArray == NULL) + if (pDVR->GetArrays()[mnIndex].mpNumericArray == NULL) throw Unhandled(); - pHostBuffer = const_cast<double*>(pDVR->GetArrays()[0].mpNumericArray); + pHostBuffer = const_cast<double*>( + pDVR->GetArrays()[mnIndex].mpNumericArray); szHostBuffer = pDVR->GetArrayLength() * sizeof(double); } else { throw Unhandled(); @@ -281,8 +282,8 @@ class DynamicKernelStringArgument: public VectorRef { public: DynamicKernelStringArgument(const std::string &s, - FormulaTreeNodeRef ft): - VectorRef(s, ft) {} + FormulaTreeNodeRef ft, int index = 0): + VectorRef(s, ft, index) {} virtual void GenSlidingWindowFunction(std::stringstream &) {} /// Generate declaration @@ -319,7 +320,7 @@ size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_prog dynamic_cast< const formula::DoubleVectorRefToken* >(ref); assert(pDVR); nStrings = pDVR->GetArrayLength(); - vRef = pDVR->GetArrays()[0]; + vRef = pDVR->GetArrays()[mnIndex]; } size_t szHostBuffer = nStrings * sizeof(cl_int); // Marshal strings. Right now we pass hashes of these string @@ -411,8 +412,9 @@ class DynamicKernelSlidingArgument: public Base { public: DynamicKernelSlidingArgument(const std::string &s, - FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen): - Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL) + FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen, + int index=0): + Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL) { FormulaToken *t = ft->GetFormulaToken(); if (t->GetType() != formula::svDoubleVectorRef) @@ -491,9 +493,6 @@ public: std::stringstream &ss, bool &needBody) { assert(mpDVR); - // Do not handle horizontal double vectors yet - if (mpDVR->GetArrays().size() > 1) - throw Unhandled(); size_t nCurWindowSize = mpDVR->GetRefRowSize(); if (dynamic_cast<OpSum*>(mpCodeGen.get())) { @@ -528,6 +527,15 @@ public: } } needBody = true; + + // No need to generate a for-loop for degenerated cases + if (nCurWindowSize == 1) + { + ss << "if (gid0 <" << mpDVR->GetArrayLength(); + ss << ")\n\t{\tint i = 0;\n\t\t"; + return nCurWindowSize; + } + ss << "for (int i = "; if (!bIsStartFixed && bIsEndFixed) { @@ -579,10 +587,10 @@ public: size_t nInput = mpDVR->GetArrayLength(); size_t nCurWindowSize = mpDVR->GetRefRowSize(); // create clmem buffer - if (mpDVR->GetArrays()[0].mpNumericArray == NULL) + if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL) throw Unhandled(); double *pHostBuffer = const_cast<double*>( - mpDVR->GetArrays()[0].mpNumericArray); + mpDVR->GetArrays()[Base::mnIndex].mpNumericArray); size_t szHostBuffer = nInput * sizeof(double); Base::mpClmem = clCreateBuffer(kEnv.mpkContext, (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, @@ -1269,15 +1277,19 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments( const formula::DoubleVectorRefToken* pDVR = dynamic_cast< const formula::DoubleVectorRefToken* >(pChild); assert(pDVR); - if (pDVR->GetArrays()[0].mpNumericArray) - mvSubArguments.push_back( - SubArgument(new DynamicKernelSlidingArgument - <VectorRef>(ts, ft->Children[i], mpCodeGen))); - else - mvSubArguments.push_back( - SubArgument(new DynamicKernelSlidingArgument - <DynamicKernelStringArgument>( - ts, ft->Children[i], mpCodeGen))); + for (size_t j = 0; j < pDVR->GetArrays().size(); ++j) + { + if (pDVR->GetArrays()[j].mpNumericArray) + mvSubArguments.push_back( + SubArgument(new DynamicKernelSlidingArgument + <VectorRef>( + ts, ft->Children[i], mpCodeGen, j))); + else + mvSubArguments.push_back( + SubArgument(new DynamicKernelSlidingArgument + <DynamicKernelStringArgument>( + ts, ft->Children[i], mpCodeGen, j))); + } } else if (pChild->GetType() == formula::svSingleVectorRef) { const formula::SingleVectorRefToken* pSVR = dynamic_cast< const formula::SingleVectorRefToken* >(pChild); diff --git a/sc/source/core/opencl/opbase.cxx b/sc/source/core/opencl/opbase.cxx index a6beffb..95657df 100644 --- a/sc/source/core/opencl/opbase.cxx +++ b/sc/source/core/opencl/opbase.cxx @@ -30,8 +30,16 @@ FormulaToken* DynamicKernelArgument::GetFormulaToken(void) const return mFormulaTree->GetFormulaToken(); } -VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft): - DynamicKernelArgument(s, ft), mpClmem(NULL) {} +VectorRef::VectorRef(const std::string &s, FormulaTreeNodeRef ft, int idx): + DynamicKernelArgument(s, ft), mpClmem(NULL), mnIndex(idx) +{ + if (mnIndex) + { + std::stringstream ss; + ss << mSymName << "s" << mnIndex; + mSymName = ss.str(); + } +} VectorRef::~VectorRef() { diff --git a/sc/source/core/opencl/opbase.hxx b/sc/source/core/opencl/opbase.hxx index 11b66df..d921119 100644 --- a/sc/source/core/opencl/opbase.hxx +++ b/sc/source/core/opencl/opbase.hxx @@ -103,7 +103,7 @@ public: virtual bool NeedParallelReduction(void) const { return false; } protected: - const std::string mSymName; + std::string mSymName; FormulaTreeNodeRef mFormulaTree; }; @@ -115,12 +115,11 @@ protected: class VectorRef : public DynamicKernelArgument { public: - VectorRef(const std::string &s, FormulaTreeNodeRef ft); + VectorRef(const std::string &s, FormulaTreeNodeRef ft, int index = 0); const std::string &GetNameAsString(void) const { return mSymName; } /// Generate declaration virtual void GenDecl(std::stringstream &ss) const; - /// When declared as input to a sliding window function virtual void GenSlidingWindowDecl(std::stringstream &ss) const; @@ -146,6 +145,8 @@ public: protected: // Used by marshaling cl_mem mpClmem; + // index in multiple double vector refs that have multiple ranges + const int mnIndex; }; /// Abstract class for code generation _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits