https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91517
--- Comment #4 from Peter Boyle <paboyle at ph dot ed.ac.uk> ---
Hi Jakob,
thanks for looking at this.
I'm trying to cut down a fail in 100k line of code package to the minimal thing
that I can submit.
www.github.com/paboyle/Grid
Is the original package;
WITH -fopenmp the following larger example still fails
#define DO_PRAGMA_(x) _Pragma (#x)
#define DO_PRAGMA(x) DO_PRAGMA_(x)
#define thread_num(a) omp_get_thread_num()
#define thread_max(a) omp_get_max_threads()
#define naked_for(i,num,...) for ( uint64_t i=0;i<num;i++) { __VA_ARGS__ } ;
#define naked_foreach(i,container,...) for ( uint64_t
i=container.begin();i<container.end();i++) { __VA_ARGS__ } ;
#define thread_for( i, num, ... ) DO_PRAGMA(omp
parallel for schedule(static)) naked_for(i,num,{__VA_ARGS__});
#define thread_foreach( i, num, ... ) DO_PRAGMA(omp
parallel for schedule(static)) naked_foreach(i,num,{__VA_ARGS__});
#define thread_for_in_region( i, num, ... ) DO_PRAGMA(omp for
schedule(static)) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse2( i, num, ... ) DO_PRAGMA(omp
parallel for collapse(2)) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse( N , i, num, ... ) DO_PRAGMA(omp
parallel for collapse ( N ) ) naked_for(i,num,{__VA_ARGS__});
#define thread_for_collapse_in_region( N , i, num, ... ) DO_PRAGMA(omp for
collapse ( N )) naked_for(i,num,{__VA_ARGS__});
#define thread_region DO_PRAGMA(omp
parallel)
#define thread_critical DO_PRAGMA(omp
critical)
template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
int _ndimension = coarse->_ndimension;
// checks
assert( nbasis == Basis.size() );
subdivides(coarse,fine);
for(int i=0;i<nbasis;i++){
conformable(Basis[i],fineData);
}
Coordinate block_r (_ndimension);
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
}
coarseData=Zero();
auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View();
// Loop over coars parallel, and then loop over fine associated with coarse.
thread_for( sf, fine->oSites(), {
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
thread_critical {
for(int i=0;i<nbasis;i++) {
auto Basis_ = Basis[i].View();
coarseData_[sc](i)=coarseData_[sc](i) +
innerProduct(Basis_[sf],fineData_[sf]);
}
}
});
return;
}
Producing critical in the wrong place:
Peters-Laptop:build peterboyle$ g++-mp-9 -fopenmp -E tmp.cc
# 1 "tmp.cc"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "tmp.cc"
# 19 "tmp.cc"
template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
int _ndimension = coarse->_ndimension;
assert( nbasis == Basis.size() );
subdivides(coarse,fine);
for(int i=0;i<nbasis;i++){
conformable(Basis[i],fineData);
}
Coordinate block_r (_ndimension);
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
}
coarseData=Zero();
auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View();
# 61 "tmp.cc"
# 61 "tmp.cc"
#pragma omp parallel for schedule(static)
# 47 "tmp.cc"
# 61 "tmp.cc"
# 61 "tmp.cc"
#pragma omp critical
# 55 "tmp.cc"
# 47 "tmp.cc"
for ( uint64_t sf=0;sf<fine->oSites();sf++) { {{ int sc; Coordinate
coor_c(_ndimension); Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int
d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); { for(int
i=0;i<nbasis;i++) { auto Basis_ = Basis[i].View();
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
} } }} } ;;
# 61 "tmp.cc"
;
return;
}