sc/source/core/opencl/formulagroupcl.cxx | 290 +++++++++++++++++++------------ 1 file changed, 186 insertions(+), 104 deletions(-)
New commits: commit 2c39e778873f10037721d844697962dc41e3bcc3 Author: I-Jui (Ray) Sung <r...@multicorewareinc.com> Date: Tue Nov 19 16:42:56 2013 -0600 GPU Calc: separate out parallel reduction from DynamicKernelSlidingArgument Create a new class ParallelReductionVectorRef to straighten out code generation and marshaling logic between sequential and parallel code generation alternatives. Change-Id: Id029ad441f80712f8e7396dcd985e3363ce08ff8 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index abd3230..388605c 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -409,6 +409,7 @@ protected: /// Handling a Double Vector that is used as a sliding window input /// to either a sliding window average or sum-of-products +/// Generate a sequential loop for reductions class OpSum; // Forward Declaration class OpAverage; // Forward Declaration class OpMin; // Forward Declaration @@ -430,79 +431,8 @@ public: bIsStartFixed = mpDVR->IsStartFixed(); bIsEndFixed = mpDVR->IsEndFixed(); } - virtual bool NeedParallelReduction(void) const - { - if ((dynamic_cast<OpSum*>(mpCodeGen.get()) - && !dynamic_cast<OpAverage*>(mpCodeGen.get())) || - dynamic_cast<OpMin*>(mpCodeGen.get()) || - dynamic_cast<OpMax*>(mpCodeGen.get()) || - dynamic_cast<OpSumIfs*>(mpCodeGen.get())) - return GetWindowSize()> 100 && - ( (GetStartFixed() && GetEndFixed()) || - (!GetStartFixed() && !GetEndFixed()) ) ; - else - return false; - } - virtual void GenSlidingWindowFunction(std::stringstream &ss) { - if (!dynamic_cast<OpSumIfs*>(mpCodeGen.get()) - && NeedParallelReduction()) - { - std::string name = Base::GetName(); - ss << "__kernel void "<<name; - ss << "_reduction(__global double* A, " - "__global double *result,int arrayLength,int windowSize){\n"; - ss << " double tmp, current_result =" << - mpCodeGen->GetBottom(); - ss << ";\n"; - ss << " int writePos = get_group_id(1);\n"; - ss << " int lidx = get_local_id(0);\n"; - ss << " __local double shm_buf[256];\n"; - if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) - ss << " int offset = 0;\n"; - else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) - ss << " int offset = get_group_id(1);\n"; - else - throw Unhandled(); - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " int loop = arrayLength/512 + 1;\n"; - ss << " for (int l=0; l<loop; l++){\n"; - ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n"; - ss << " int loopOffset = l*512;\n"; - ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; - ss << " tmp = "; - ss << mpCodeGen->Gen2( - std::string( - "legalize(A[loopOffset + lidx + offset], ")+ - mpCodeGen->GetBottom() +")", - std::string( - "legalize(A[loopOffset + lidx + offset + 256], ")+ - mpCodeGen->GetBottom() +")" - ); - ss << ";"; - ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; - ss << " tmp = legalize(A[loopOffset + lidx + offset],"; - ss << mpCodeGen->GetBottom() << ");\n"; - ss << " shm_buf[lidx] = tmp;\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " for (int i = 128; i >0; i/=2) {\n"; - ss << " if (lidx < i)\n"; - ss << " shm_buf[lidx] = "; - ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]"); - ss << ";"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " }\n"; - ss << " if (lidx == 0)\n"; - ss << " current_result ="; - ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); - ss << ";\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " }\n"; - ss << " if (lidx == 0)\n"; - ss << " result[writePos] = current_result;\n"; - ss << "}\n"; - } - } + virtual void GenSlidingWindowFunction(std::stringstream &) {} virtual std::string GenSlidingWindowDeclRef(bool=false) const { @@ -519,26 +449,7 @@ public: { assert(mpDVR); size_t nCurWindowSize = mpDVR->GetRefRowSize(); - if (!dynamic_cast<OpSumIfs*>(mpCodeGen.get()) - && NeedParallelReduction()) - { - if ((!bIsStartFixed && !bIsEndFixed) || - (bIsStartFixed && bIsEndFixed)) - { - // set 100 as a temporary threshold for invoking reduction - // kernel in NeedParalleLReduction function - if (NeedParallelReduction()) - { - std::string temp = Base::GetName() + "[gid0]"; - ss << "tmp = "; - ss << mpCodeGen->Gen2(temp, "tmp"); - ss << ";\n\t"; - needBody = false; - return nCurWindowSize; - } - } - } -// original for loop + // original for loop #ifndef UNROLLING needBody = true; // No need to generate a for-loop for degenerated cases @@ -586,8 +497,6 @@ public: return nCurWindowSize; #endif - - #ifdef UNROLLING { if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) { @@ -709,13 +618,134 @@ return nCurWindowSize; } #endif } + ~DynamicKernelSlidingArgument() + { + if (mpClmem2) + { + clReleaseMemObject(mpClmem2); + mpClmem2 = NULL; + } + } - virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) + size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); } + + size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); } + + size_t GetStartFixed(void) const {return bIsStartFixed; } + + size_t GetEndFixed(void) const {return bIsEndFixed; } + +protected: + bool bIsStartFixed, bIsEndFixed; + const formula::DoubleVectorRefToken *mpDVR; + // from parent nodes + boost::shared_ptr<SlidingFunctionBase> mpCodeGen; + // controls whether to invoke the reduction kernel during marshaling or not + cl_mem mpClmem2; +}; + +/// Handling a Double Vector that is used as a sliding window input +/// Performs parallel reduction based on given operator +template<class Base> +class ParallelReductionVectorRef: public Base +{ +public: + ParallelReductionVectorRef(const std::string &s, + FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen, + int index=0): + Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL) + { + FormulaToken *t = ft->GetFormulaToken(); + if (t->GetType() != formula::svDoubleVectorRef) + throw Unhandled(); + mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t); + assert(mpDVR); + bIsStartFixed = mpDVR->IsStartFixed(); + bIsEndFixed = mpDVR->IsEndFixed(); + } + /// Emit the definition for the auxiliary reduction kernel + virtual void GenSlidingWindowFunction(std::stringstream &ss) { + std::string name = Base::GetName(); + ss << "__kernel void "<<name; + ss << "_reduction(__global double* A, " + "__global double *result,int arrayLength,int windowSize){\n"; + ss << " double tmp, current_result =" << + mpCodeGen->GetBottom(); + ss << ";\n"; + ss << " int writePos = get_group_id(1);\n"; + ss << " int lidx = get_local_id(0);\n"; + ss << " __local double shm_buf[256];\n"; + if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) + ss << " int offset = 0;\n"; + else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) + ss << " int offset = get_group_id(1);\n"; + else + throw Unhandled(); + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; + ss << " int loop = arrayLength/512 + 1;\n"; + ss << " for (int l=0; l<loop; l++){\n"; + ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n"; + ss << " int loopOffset = l*512;\n"; + ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n"; + ss << " tmp = "; + ss << mpCodeGen->Gen2( + std::string( + "legalize(A[loopOffset + lidx + offset], ")+ + mpCodeGen->GetBottom() +")", + std::string( + "legalize(A[loopOffset + lidx + offset + 256], ")+ + mpCodeGen->GetBottom() +")" + ); + ss << ";\n"; + ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n"; + ss << " tmp = legalize(A[loopOffset + lidx + offset],"; + ss << mpCodeGen->GetBottom() << ");\n"; + ss << " shm_buf[lidx] = tmp;\n"; + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; + ss << " for (int i = 128; i >0; i/=2) {\n"; + ss << " if (lidx < i)\n"; + ss << " shm_buf[lidx] = "; + ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]"); + ss << ";"; + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; + ss << " }\n"; + ss << " if (lidx == 0)\n"; + ss << " current_result ="; + ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); + ss << ";\n"; + ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; + ss << " }\n"; + ss << " if (lidx == 0)\n"; + ss << " result[writePos] = current_result;\n"; + ss << "}\n"; + } + + + virtual std::string GenSlidingWindowDeclRef(bool=false) const + { + std::stringstream ss; + if (!bIsStartFixed && !bIsEndFixed) + ss << Base::GetName() << "[i + gid0]"; + else + ss << Base::GetName() << "[i]"; + return ss.str(); + } + /// Controls how the elements in the DoubleVectorRef are traversed + virtual size_t GenReductionLoopHeader( + std::stringstream &ss, bool &needBody) { - if (!NeedParallelReduction() || - dynamic_cast<OpSumIfs*>(mpCodeGen.get())) - return Base::Marshal(k, argno, w, mpProgram); + assert(mpDVR); + size_t nCurWindowSize = mpDVR->GetRefRowSize(); + std::string temp = Base::GetName() + "[gid0]"; + ss << "tmp = "; + ss << mpCodeGen->Gen2(temp, "tmp"); + ss << ";\n\t"; + needBody = false; + return nCurWindowSize; + } + virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) + { assert(Base::mpClmem == NULL); // Obtain cl context KernelEnv kEnv; @@ -780,7 +810,7 @@ return nCurWindowSize; throw OpenCLError(err); return 1; } - ~DynamicKernelSlidingArgument() + ~ParallelReductionVectorRef() { if (mpClmem2) { @@ -806,6 +836,8 @@ protected: cl_mem mpClmem2; }; + + /// Abstract class for code generation class Reduction: public SlidingFunctionBase @@ -813,6 +845,7 @@ class Reduction: public SlidingFunctionBase public: typedef DynamicKernelSlidingArgument<VectorRef> NumericRange; typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange; + typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange; virtual void GenSlidingWindowFunction(std::stringstream &ss, const std::string sSymName, SubArguments &vSubArguments) @@ -834,13 +867,23 @@ public: size_t nItems = 0; while (i--) { - if (NumericRange *NR = dynamic_cast<NumericRange *> (vSubArguments[i].get())) + if (NumericRange *NR = + dynamic_cast<NumericRange *> (vSubArguments[i].get())) { bool needBody; nItems += NR->GenReductionLoopHeader(ss, needBody); if (needBody == false) continue; } - else if (StringRange *SR = dynamic_cast<StringRange *> (vSubArguments[i].get())) + else if (ParallelNumericRange *PNR = + dynamic_cast<ParallelNumericRange *> (vSubArguments[i].get())) + { + //did not handle yet + bool needBody; + nItems += PNR->GenReductionLoopHeader(ss, needBody); + if (needBody == false) continue; + } + else if (StringRange *SR = + dynamic_cast<StringRange *> (vSubArguments[i].get())) { //did not handle yet bool needBody; @@ -1497,6 +1540,46 @@ boost::shared_ptr<DynamicKernelArgument> SoPHelper( return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen)); } +template<class Base> +DynamicKernelArgument *VectorRefFactory(const std::string &s, + const FormulaTreeNodeRef& ft, + boost::shared_ptr<SlidingFunctionBase> &pCodeGen, + int index) +{ + //Black lists ineligible classes here .. + // SUMIFS does not perform parallel reduction at DoubleVectorRef level + if (dynamic_cast<OpSumIfs*>(pCodeGen.get())) { + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + } + // AVERAGE is not supported yet + else if (dynamic_cast<OpAverage*>(pCodeGen.get())) + { + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + } + // COUNT is not supported yet + else if (dynamic_cast<OpCount*>(pCodeGen.get())) + { + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + } + // Only child class of Reduction is supported + else if (!dynamic_cast<Reduction*>(pCodeGen.get())) + { + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + } + + const formula::DoubleVectorRefToken* pDVR = + dynamic_cast< const formula::DoubleVectorRefToken* >( + ft->GetFormulaToken()); + // Window being too small to justify a parallel reduction + if (pDVR->GetRefRowSize() < 100) + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); + if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) || + (!pDVR->IsStartFixed() && !pDVR->IsEndFixed())) + return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index); + else // Other cases are not supported as well + return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index); +} + DynamicKernelSoPArguments::DynamicKernelSoPArguments( const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen) : DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen) @@ -1523,12 +1606,11 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments( { if (pDVR->GetArrays()[j].mpNumericArray) mvSubArguments.push_back( - SubArgument(new DynamicKernelSlidingArgument - <VectorRef>( + SubArgument(VectorRefFactory<VectorRef>( ts, ft->Children[i], mpCodeGen, j))); else mvSubArguments.push_back( - SubArgument(new DynamicKernelSlidingArgument + SubArgument(VectorRefFactory <DynamicKernelStringArgument>( ts, ft->Children[i], mpCodeGen, j))); } _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits