This is an automated email from the git hooks/post-receive script. sebastic-guest pushed a commit to branch upstream-master in repository pktools.
commit f1c59c74ae910460ee382eb9406f2a2aad0ba353 Author: Pieter Kempeneers <kempe...@gmail.com> Date: Fri Jan 18 17:28:27 2013 +0100 string map in pkclassify_nn.c and pkclassify_svm.cc --- src/algorithms/ConfusionMatrix.h | 1 + src/algorithms/myfann_cpp.h | 11 ++- src/apps/pkclassify_nn.cc | 96 +++++++++++++++++-------- src/apps/pkclassify_nn.h | 151 +++++++++++++++++++++++++++++++++++++++ src/apps/pkclassify_svm.cc | 138 ++++++++++++++++++++++------------- src/imageclasses/ImgReaderOgr.h | 136 +++++++++++++++++++++++++++++++++++ 6 files changed, 455 insertions(+), 78 deletions(-) diff --git a/src/algorithms/ConfusionMatrix.h b/src/algorithms/ConfusionMatrix.h index 33cf04f..b5481fb 100644 --- a/src/algorithms/ConfusionMatrix.h +++ b/src/algorithms/ConfusionMatrix.h @@ -33,6 +33,7 @@ public: ConfusionMatrix(const vector<string>& classNames); ConfusionMatrix(const ConfusionMatrix& cm); ConfusionMatrix& operator=(const ConfusionMatrix& cm); + short size() const {return m_results.size();}; void resize(short nclass); void setClassNames(const vector<string>& classNames); void pushBackClassName(const string& className); diff --git a/src/algorithms/myfann_cpp.h b/src/algorithms/myfann_cpp.h index 6aba87f..4901cfa 100644 --- a/src/algorithms/myfann_cpp.h +++ b/src/algorithms/myfann_cpp.h @@ -1535,6 +1535,10 @@ public: trainingFeatures[iclass].push_back(testFeatures[iclass].back()); testFeatures[iclass].pop_back(); } + if(verbose>1){ + std::cout << "training size " << iclass << ": " << trainingFeatures[iclass].size() << std::endl; + std::cout << "test size " << iclass << ": " << testFeatures[iclass].size() << std::endl; + } assert(trainingFeatures[iclass].size()); } //create test sample @@ -1552,8 +1556,13 @@ public: std::cout << "Error: testclass " << testclass << " has no training" << std::endl; assert(trainingFeatures[testclass].size()); ++nsample; - if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=cv) + if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=(cv-1)){ + if(verbose>1){ + std::cout << "training size " << testclass << ": " << trainingFeatures[testclass].size() << std::endl; + std::cout << "test size " << testclass << ": " << testFeatures[testclass].size() << std::endl; + } testclass=(testclass+1)%nclass; + } } assert(nsample==ntest); //training with left out training set diff --git a/src/apps/pkclassify_nn.cc b/src/apps/pkclassify_nn.cc index 1c56084..c9f678a 100644 --- a/src/apps/pkclassify_nn.cc +++ b/src/apps/pkclassify_nn.cc @@ -100,17 +100,17 @@ int main(int argc, char *argv[]) Optionpk<bool> todo_opt("\0","todo","",false); Optionpk<string> input_opt("i", "input", "input image"); Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); - Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file. (default is label)","label"); - Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.). Default is 0: do not reclass", 0); - Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class (default 0: do not balance)", 0); - Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (default is 0: consider all classes", 0); + Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label"); + Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)"); + Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0); + Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0); Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0); Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0); Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)"); Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0); Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0); Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (1: sum rule, 2: max rule).",1); - Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 ), default set to equal priors)", 0.0); + Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0); Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0); Optionpk<unsigned int> nneuron_opt("\0", "nneuron", "number of neurons in hidden layers in neural network (multiple hidden layers are set by defining multiple number of neurons: -n 15 -n 1, default is one hidden layer with 5 neurons)", 5); Optionpk<float> connection_opt("\0", "connection", "connection reate (default: 1.0 for a fully connected network)", 1.0); @@ -186,7 +186,8 @@ int main(int argc, char *argv[]) } if(verbose_opt[0]>=1){ - cout << "image filename: " << input_opt[0] << endl; + if(input_opt.size()) + cout << "image filename: " << input_opt[0] << endl; if(mask_opt.size()) cout << "mask filename: " << mask_opt[0] << endl; if(training_opt.size()){ @@ -213,9 +214,10 @@ int main(int argc, char *argv[]) vector< vector<double> > offset(nbag); vector< vector<double> > scale(nbag); + map<string,Vector2d<float> > trainingMap; vector< Vector2d<float> > trainingPixels;//[class][sample][band] - if(reclass_opt.size()>1){ + if(reclass_opt.size()){ vreclass.resize(reclass_opt.size()); for(int iclass=0;iclass<reclass_opt.size();++iclass){ reclassMap[iclass]=reclass_opt[iclass]; @@ -242,8 +244,10 @@ int main(int argc, char *argv[]) for(int ibag=0;ibag<nbag;++ibag){ //organize training data if(ibag<training_opt.size()){//if bag contains new training pixels + trainingMap.clear(); trainingPixels.clear(); - map<int,Vector2d<float> > trainingMap; + // map<int,Vector2d<float> > trainingMap; + // map<string,Vector2d<float> > trainingMap; if(verbose_opt[0]>=1) cout << "reading imageShape file " << training_opt[0] << endl; try{ @@ -264,20 +268,21 @@ int main(int argc, char *argv[]) cerr << "error catched" << std::endl; exit(1); } - //delete class 0 - if(verbose_opt[0]>=1) - cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << endl; - totalSamples-=trainingMap[0].size(); - trainingMap.erase(0); + //delete class 0 ? + // if(verbose_opt[0]>=1) + // std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl; + // totalSamples-=trainingMap[0].size(); + // trainingMap.erase(0); //convert map to vector short iclass=0; - if(reclass_opt.size()==1){//no reclass option, read classes from shape + if(reclass_opt.empty()){//no reclass option, read classes from shape reclassMap.clear(); vreclass.clear(); } if(verbose_opt[0]>1) - cout << "training pixels: " << endl; - map<int,Vector2d<float> >::iterator mapit=trainingMap.begin(); + std::cout << "training pixels: " << std::endl; + map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); + // map<int,Vector2d<float> >::iterator mapit=trainingMap.begin(); while(mapit!=trainingMap.end()){ // for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){ //delete small classes @@ -286,13 +291,14 @@ int main(int argc, char *argv[]) continue; //todo: beware of reclass option: delete this reclass if no samples are left in this classes!! } - if(reclass_opt.size()==1){//no reclass option, read classes from shape - reclassMap[iclass]=(mapit->first); - vreclass.push_back(mapit->first); + if(reclass_opt.empty()){//no reclass option, read classes from shape + // reclassMap[iclass]=(mapit->first); + // vreclass.push_back(mapit->first); + vreclass.push_back(iclass); } trainingPixels.push_back(mapit->second); if(verbose_opt[0]>1) - cout << mapit->first << ": " << (mapit->second).size() << " samples" << endl; + std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl; ++iclass; ++mapit; } @@ -304,7 +310,8 @@ int main(int argc, char *argv[]) assert(nclass==trainingPixels.size()); assert(nband==(training_opt.size())?trainingPixels[0][0].size()-2:trainingPixels[0][0].size()); } - assert(reclassMap.size()==nclass); + // assert(reclassMap.size()==nclass); + assert(vreclass.size()==nclass); //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp //balance training data @@ -483,8 +490,9 @@ int main(int argc, char *argv[]) unsigned int nFeatures=trainingFeatures[0][0].size(); unsigned int ntraining=0; for(int iclass=0;iclass<nclass;++iclass){ - if(verbose_opt[0]>=1) - cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl; + //vcode has size nreclass??? + // if(verbose_opt[0]>=1) + // cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl; ntraining+=trainingFeatures[iclass].size(); } const unsigned int num_layers = nneuron_opt.size()+2; @@ -554,17 +562,48 @@ int main(int argc, char *argv[]) referenceVector, outputVector, verbose_opt[0]); - ConfusionMatrix cm(nclass); + ConfusionMatrix cm; + map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); + if(reclass_opt.empty()){ + while(mapit!=trainingMap.end()){ + cm.pushBackClassName(mapit->first); + ++mapit; + } + } + else{ + if(verbose_opt[0]>1) + std::cout << "classes for confusion matrix: " << std::endl; + for(int iclass=0;iclass<nreclass;++iclass){ + ostringstream os; + os << vcode[iclass]; + if(verbose_opt[0]>1) + std::cout << os.str() << " "; + cm.pushBackClassName(os.str()); + } + if(verbose_opt[0]>1) + std::cout << std::endl; + } + assert(cm.size()==nreclass); + for(int isample=0;isample<referenceVector.size();++isample) - cm.incrementResult(cm.getClass(referenceVector[isample]),cm.getClass(outputVector[isample]),1); + cm.incrementResult(cm.getClass(vreclass[referenceVector[isample]]),cm.getClass(vreclass[outputVector[isample]]),1); assert(cm.nReference()); std::cout << cm << std::endl; - std::cout << "Kappa: " << cm.kappa() << std::endl; + cout << "class #samples userAcc prodAcc" << endl; + double se95_ua=0; + double se95_pa=0; double se95_oa=0; + double dua=0; + double dpa=0; double doa=0; + for(int iclass=0;iclass<cm.nClasses();++iclass){ + dua=cm.ua_pct(cm.getClass(iclass),&se95_ua); + dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa); + cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl; + } + std::cout << "Kappa: " << cm.kappa() << std::endl; doa=cm.oa_pct(&se95_oa); std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; - std::cout << "rmse cross-validation: " << rmse << std::endl; } if(verbose_opt[0]>=1) @@ -602,7 +641,8 @@ int main(int argc, char *argv[]) } //--------------------------------- end of training ----------------------------------- - + if(input_opt.empty()) + exit(0); //-------------------------------- open image file ------------------------------------ if(input_opt[0].find(".shp")==string::npos){ diff --git a/src/apps/pkclassify_nn.h b/src/apps/pkclassify_nn.h index 583c982..c253e40 100644 --- a/src/apps/pkclassify_nn.h +++ b/src/apps/pkclassify_nn.h @@ -42,6 +42,13 @@ template<typename T> unsigned int readDataImageShape(const string &filename, int verbose=false); template<typename T> unsigned int readDataImageShape(const string &filename, + map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr], + vector<string>& fields, + const vector<short>& bands, + const string& label, + int verbose=false); + +template<typename T> unsigned int readDataImageShape(const string &filename, map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr], vector<string>& fields, const vector<short>& bands, @@ -117,6 +124,82 @@ template<typename T> unsigned int readDataImageShape(const string &filename, return totalSamples; } +template<typename T> unsigned int readDataImageShape(const string &filename, + map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr], + vector<string>& fields, + const vector<short>& bands, + const string& label, + int verbose) +{ + mapPixels.clear(); + int nsample=0; + int totalSamples=0; + int nband=0; + if(verbose) + cout << "reading shape file " << filename << endl; + ImgReaderOgr imgReaderShape; + try{ + imgReaderShape.open(filename); + //only retain bands in fields + imgReaderShape.getFields(fields); + vector<string>::iterator fit=fields.begin(); + if(verbose>1) + cout << "reading fields: "; + while(fit!=fields.end()){ + if(verbose) + cout << *fit << " "; + // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ "); + if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){ + int theBand=atoi((*fit).substr(1).c_str()); + if(bands.size()){ + bool validBand=false; + for(int iband=0;iband<bands.size();++iband){ + if(theBand==bands[iband]) + validBand=true; + } + if(validBand) + ++fit; + else + fields.erase(fit); + } + else + ++fit; + } + else + fields.erase(fit); + } + if(verbose) + cout << endl; + if(verbose){ + cout << "fields:"; + for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit) + cout << " " << *fit; + cout << endl; + } + if(!nband){ + if(verbose) + cout << "reading data" << endl; + nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2); + + } + else + assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false)); + } + catch(string e){ + ostringstream estr; + estr << e << " " << filename; + throw(estr.str()); + } + nsample=imgReaderShape.getFeatureCount(); + totalSamples+=nsample; + if(verbose) + cout << ": " << nsample << " samples read with " << nband << " bands" << endl; + imgReaderShape.close(); + if(verbose) + cout << "total number of samples read " << totalSamples << endl; + return totalSamples; +} + template<typename T> unsigned int readDataImageShape(const string &filename, map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr], @@ -185,4 +268,72 @@ template<typename T> unsigned int readDataImageShape(const string &filename, cout << "total number of samples read " << totalSamples << endl; return totalSamples; } + +template<typename T> unsigned int readDataImageShape(const string &filename, + map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr], + vector<string>& fields, + double start, + double end, + const string& label, + int verbose) +{ + mapPixels.clear(); + int nsample=0; + int totalSamples=0; + int nband=0; + if(verbose) + cout << "reading shape file " << filename << endl; + ImgReaderOgr imgReaderShape; + try{ + imgReaderShape.open(filename); + //only retain bands in fields + imgReaderShape.getFields(fields); + vector<string>::iterator fit=fields.begin(); + if(verbose) + cout << "reading fields: "; + while(fit!=fields.end()){ + if(verbose) + cout << *fit << " "; + // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ "); + if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){ + int iband=atoi((*fit).substr(1).c_str()); + if((start||end)&&(iband<start||iband>end)) + fields.erase(fit); + else + ++fit; + } + else + fields.erase(fit); + } + if(verbose) + cout << endl; + if(verbose){ + cout << "fields:"; + for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit) + cout << " " << *fit; + cout << endl; + } + if(!nband){ + if(verbose) + cout << "reading data" << endl; + nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2); + + } + else + assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false)); + } + catch(string e){ + ostringstream estr; + estr << e << " " << filename; + throw(estr.str()); + } + nsample=imgReaderShape.getFeatureCount(); + totalSamples+=nsample; + if(verbose) + cout << ": " << nsample << " samples read with " << nband << " bands" << endl; + imgReaderShape.close(); + if(verbose) + cout << "total number of samples read " << totalSamples << endl; + return totalSamples; +} #endif //_PKCLASSIFY_NN_H_ diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc index 9377fc6..2e36ce1 100644 --- a/src/apps/pkclassify_svm.cc +++ b/src/apps/pkclassify_svm.cc @@ -82,8 +82,9 @@ void reclass(const vector<double>& result, const vector<int>& vreclass, const ve int main(int argc, char *argv[]) { - map<short,int> reclassMap; + // map<short,int> reclassMap; vector<int> vreclass; //vreclass: map nclass->nreclass + vector<int> vuniqueclass; vector<double> priors; vector<double> priorsReclass; @@ -101,10 +102,10 @@ int main(int argc, char *argv[]) Optionpk<bool> todo_opt("\0","todo","",false); Optionpk<string> input_opt("i", "input", "input image"); Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); - Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); - Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0); - Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0); - Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0); + Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label"); + Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)"); + Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0); + Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0); Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0); Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0); Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)"); @@ -112,8 +113,7 @@ int main(int argc, char *argv[]) Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0); Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: max rule).",0); Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0); - - + Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0); Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0); Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2); Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3); @@ -127,7 +127,6 @@ int main(int argc, char *argv[]) Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false); Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false); // Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true); - Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0); Optionpk<unsigned short> comb_opt("c", "comb", "how to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0); Optionpk<unsigned short> bag_opt("\0", "bag", "Number of bootstrap aggregations", 1); Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation", 100); @@ -229,14 +228,10 @@ int main(int argc, char *argv[]) int nband=0; int startBand=2;//first two bands represent X and Y pos - vector< vector<double> > offset(nbag); - vector< vector<double> > scale(nbag); - vector< Vector2d<float> > trainingPixels;//[class][sample][band] - - if(reclass_opt.size()>1){ + if(reclass_opt.size()){ vreclass.resize(reclass_opt.size()); for(int iclass=0;iclass<reclass_opt.size();++iclass){ - reclassMap[iclass]=reclass_opt[iclass]; + // reclassMap[iclass]=reclass_opt[iclass]; vreclass[iclass]=reclass_opt[iclass]; } } @@ -258,15 +253,36 @@ int main(int argc, char *argv[]) std::sort(band_opt.begin(),band_opt.end()); //----------------------------------- Training ------------------------------- + vector< vector<double> > offset(nbag); + vector< vector<double> > scale(nbag); + map<string,Vector2d<float> > trainingMap; + vector< Vector2d<float> > trainingPixels;//[class][sample][band] + vector<struct svm_problem> prob(nbag); vector<struct svm_node *> x_space(nbag); + + //test + // ImgReaderOgr testOgr(training_opt[0]); + // OGRDataSource* testSource=testOgr.getDataSource(); + // OGRLayer *poLayer=testSource->GetLayer(0); + // unsigned long int ifeature=0; + // if(poLayer!=NULL){ + // OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn(); + // OGRFeature *poFeature; + // while( (poFeature = poLayer->GetNextFeature()) != NULL ){ + // std::cout << "got feature " << ifeature << std::endl; + // ++ifeature; + // } + // exit(1); + // } + // struct svm_node *x_space; vector<string> fields; for(int ibag=0;ibag<nbag;++ibag){ //organize training data if(ibag<training_opt.size()){//if bag contains new training pixels + trainingMap.clear(); trainingPixels.clear(); - map<int,Vector2d<float> > trainingMap; if(verbose_opt[0]>=1) std::cout << "reading imageShape file " << training_opt[0] << std::endl; try{ @@ -287,20 +303,21 @@ int main(int argc, char *argv[]) cerr << "error catched" << std::endl; exit(1); } - //delete class 0 - if(verbose_opt[0]>=1) - std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl; - totalSamples-=trainingMap[0].size(); - trainingMap.erase(0); + //todo: delete class 0 ? + // if(verbose_opt[0]>=1) + // std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl; + // totalSamples-=trainingMap[0].size(); + // trainingMap.erase(0); + //convert map to vector short iclass=0; - if(reclass_opt.size()==1){//no reclass option, read classes from shape - reclassMap.clear(); + if(reclass_opt.empty()){//no reclass option, read classes from shape + // reclassMap.clear(); vreclass.clear(); } if(verbose_opt[0]>1) std::cout << "training pixels: " << std::endl; - map<int,Vector2d<float> >::iterator mapit=trainingMap.begin(); + map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); while(mapit!=trainingMap.end()){ // for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){ //delete small classes @@ -309,9 +326,10 @@ int main(int argc, char *argv[]) continue; //todo: beware of reclass option: delete this reclass if no samples are left in this classes!! } - if(reclass_opt.size()==1){//no reclass option, read classes from shape - reclassMap[iclass]=(mapit->first); - vreclass.push_back(mapit->first); + if(reclass_opt.empty()){//no reclass option, read classes from shape + // reclassMap[iclass]=(mapit->first); + // vreclass.push_back(mapit->first); + vreclass.push_back(iclass); } trainingPixels.push_back(mapit->second); if(verbose_opt[0]>1) @@ -327,7 +345,8 @@ int main(int argc, char *argv[]) assert(nclass==trainingPixels.size()); assert(nband==trainingPixels[0][0].size()-2); } - assert(reclassMap.size()==nclass); + // assert(reclassMap.size()==nclass); + assert(vreclass.size()==nclass); //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp //balance training data @@ -438,7 +457,7 @@ int main(int argc, char *argv[]) std::cout << std::endl; } - vector<int> vuniqueclass=vreclass; + vuniqueclass=vreclass; //remove duplicate elements from vuniqueclass sort( vuniqueclass.begin(), vuniqueclass.end() ); vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() ); @@ -507,8 +526,9 @@ int main(int argc, char *argv[]) unsigned int nFeatures=trainingFeatures[0][0].size(); unsigned int ntraining=0; for(int iclass=0;iclass<nclass;++iclass){ - if(verbose_opt[0]>=1) - std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl; + //vcode has size nreclass??? + // if(verbose_opt[0]>=1) + // std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl; ntraining+=trainingFeatures[iclass].size(); } // vector<struct svm_problem> prob(ibag); @@ -521,14 +541,10 @@ int main(int argc, char *argv[]) int lIndex=0; for(int iclass=0;iclass<nclass;++iclass){ for(int isample=0;isample<trainingFeatures[iclass].size();++isample){ - // //test - // std::cout << iclass; prob[ibag].x[lIndex]=&(x_space[ibag][spaceIndex]); for(int ifeature=0;ifeature<nFeatures;++ifeature){ x_space[ibag][spaceIndex].index=ifeature+1; x_space[ibag][spaceIndex].value=trainingFeatures[iclass][isample][ifeature]; - // //test - // std::cout << " " << x_space[ibag][spaceIndex].index << ":" << x_space[ibag][spaceIndex].value; ++spaceIndex; } x_space[ibag][spaceIndex++].index=-1; @@ -566,41 +582,65 @@ int main(int argc, char *argv[]) if(verbose_opt[0]>1) std::cout << "SVM is now trained" << std::endl; if(cv_opt[0]>0){ - std::cout << "Confusion matrix" << std::endl; - ConfusionMatrix cm(nclass); - // for(int iclass=0;iclass<nclass;++iclass) - // cm.pushBackClassName(type2string(iclass)); + //todo: implement reclassification + // ConfusionMatrix cm(nclass); + ConfusionMatrix cm; + map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); + if(reclass_opt.empty()){ + while(mapit!=trainingMap.end()){ + cm.pushBackClassName(mapit->first); + ++mapit; + } + } + else{ + if(verbose_opt[0]>1) + std::cout << "classes for confusion matrix: " << std::endl; + for(int iclass=0;iclass<nreclass;++iclass){ + ostringstream os; + os << vcode[iclass]; + if(verbose_opt[0]>1) + std::cout << os.str() << " "; + cm.pushBackClassName(os.str()); + } + if(verbose_opt[0]>1) + std::cout << std::endl; + } + assert(cm.size()==nreclass); + double *target = Malloc(double,prob[ibag].l); svm_cross_validation(&prob[ibag],¶m[ibag],cv_opt[0],target); assert(param[ibag].svm_type != EPSILON_SVR&¶m[ibag].svm_type != NU_SVR);//only for regression - int total_correct=0; + for(int i=0;i<prob[ibag].l;i++) - cm.incrementResult(cm.getClass(prob[ibag].y[i]),cm.getClass(target[i]),1); + cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1); assert(cm.nReference()); std::cout << cm << std::endl; - std::cout << "Kappa: " << cm.kappa() << std::endl; + cout << "class #samples userAcc prodAcc" << endl; + double se95_ua=0; + double se95_pa=0; double se95_oa=0; + double dua=0; + double dpa=0; double doa=0; + for(int iclass=0;iclass<cm.nClasses();++iclass){ + dua=cm.ua_pct(cm.getClass(iclass),&se95_ua); + dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa); + cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl; + } + std::cout << "Kappa: " << cm.kappa() << std::endl; doa=cm.oa_pct(&se95_oa); std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; free(target); } - // *NOTE* Because svm_model contains pointers to svm_problem, you can // not free the memory used by svm_problem if you are still using the // svm_model produced by svm_train(). - - // free(prob.y); - // free(prob.x); - // free(x_space); - // svm_destroy_param(¶m); }//for ibag //--------------------------------- end of training ----------------------------------- - if(!output_opt.size()) + if(input_opt.empty()) exit(0); - const char* pszMessage; void* pProgressArg=NULL; GDALProgressFunc pfnProgress=GDALTermProgress; diff --git a/src/imageclasses/ImgReaderOgr.h b/src/imageclasses/ImgReaderOgr.h index 5863420..2ead995 100644 --- a/src/imageclasses/ImgReaderOgr.h +++ b/src/imageclasses/ImgReaderOgr.h @@ -49,6 +49,7 @@ public: template <typename T> int readData(vector<T>& data, const OGRFieldType& fieldType, const string& theField, int layer=0, bool verbose=false); template <typename T> int readData(Vector2d<T>& data, const OGRFieldType& fieldType, vector<string>& fields, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data template <typename T> int readData(map<int,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data + template <typename T> int readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data void shape2ascii(ostream& theOstream, const string& pointname, int layer=0, bool verbose=false); unsigned long int getFeatureCount(int layer=0) const; int getFieldCount(int layer=0) const; @@ -207,6 +208,141 @@ template <typename T> int ImgReaderOgr::readData(map<int,Vector2d<T> >& data, co } } +//read data from all features in a map, organized by class names +template <typename T> int ImgReaderOgr::readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer, bool pos, bool verbose) +{ + if(layer<0) + layer=m_datasource->GetLayerCount()-1; + assert(m_datasource->GetLayerCount()>layer); + OGRLayer *poLayer; + if(verbose) + cout << "number of layers: " << m_datasource->GetLayerCount() << endl; + poLayer = m_datasource->GetLayer(layer); + if(poLayer!=NULL){ + OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn(); + if(fields.empty()){ + fields.resize(poFDefn->GetFieldCount()); + if(verbose) + cout << "resized fields to " << fields.size() << endl; + } + //start reading features from the layer + OGRFeature *poFeature; + if(verbose) + cout << "reset reading" << endl; + poLayer->ResetReading(); + unsigned long int ifeature=0; + int posOffset=(pos)?2:0; + if(verbose) + cout << "going through features to fill in string map" << endl << flush; + string theClass; + while( (poFeature = poLayer->GetNextFeature()) != NULL ){ + vector<T> theFeature;//(fields.size()+posOffset);//x,y+selectedfields + if(verbose) + cout << "reading feature " << ifeature << endl << flush; + OGRGeometry *poGeometry; + poGeometry = poFeature->GetGeometryRef(); + if(verbose){ + if(poGeometry == NULL) + cerr << "no geometry defined" << endl << flush; + else if(wkbFlatten(poGeometry->getGeometryType()) != wkbPoint) + cerr << "Warning: poGeometry type: " << wkbFlatten(poGeometry->getGeometryType()) << endl << flush; + } + assert(poGeometry != NULL ); + // && wkbFlatten(poGeometry->getGeometryType()) == wkbPoint); + OGRPoint *poPoint; + if(pos){ + poPoint = (OGRPoint *) poGeometry; + if(wkbFlatten(poGeometry->getGeometryType()) == wkbPoint){ + theFeature.push_back(poPoint->getX()); + theFeature.push_back(poPoint->getY()); + } + else if(wkbFlatten(poGeometry->getGeometryType()) == wkbPolygon){ + OGRPolygon * poPolygon = (OGRPolygon *) poGeometry; + poPolygon->Centroid(poPoint); + theFeature.push_back(poPoint->getX()); + theFeature.push_back(poPoint->getY()); + } + else{ + string errorstring="Error: Centroid for non polygon geometry not supported until OGR 1.8.0, change ImgReaderOgr if version >= 1.8.0 is installed..."; + throw(errorstring); + // poGeometry->Centroid(poPoint); + // theFeature.push_back(poPoint->getX()); + // theFeature.push_back(poPoint->getY()); + } + } + // OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();//got LayerDefn already... + string featurename; + for(int iField=0;iField<poFDefn->GetFieldCount();++iField){ + OGRFieldDefn *poFieldDefn = poFDefn->GetFieldDefn(iField); + string fieldname=poFieldDefn->GetNameRef(); + if(fieldname==label){ + theClass=poFeature->GetFieldAsString(iField); + if(verbose) + std::cout << "read feature for " << theClass << std::endl; + } + else{ + switch(fieldType){ + case(OFTReal): + if(fields.size()<poFDefn->GetFieldCount()){ + if(find(fields.begin(),fields.end(),fieldname)!=fields.end()) + theFeature.push_back(poFeature->GetFieldAsDouble(iField)); + } + else{ + fields[iField]=fieldname; + theFeature.push_back(poFeature->GetFieldAsDouble(iField)); + } + break; + case(OFTInteger): + if(fields.size()<poFDefn->GetFieldCount()){ + if(find(fields.begin(),fields.end(),fieldname)!=fields.end()) + theFeature.push_back(poFeature->GetFieldAsDouble(iField)); + } + else{ + fields[iField]=fieldname; + theFeature.push_back(poFeature->GetFieldAsDouble(iField)); + } + break; + default: + { + string errorstring="field type not supported in ImgReaderOgr::ReadData"; + throw(errorstring); + } + break; + } + } + } + data[theClass].push_back(theFeature); + ++ifeature; + ++ifeature; + } + if(verbose) + cout << "number of features read: " << ifeature << endl << flush; + typename map<string,Vector2d<T> >::const_iterator mit=data.begin(); + int nband=0; + if(verbose) + cout << "read classes: " << flush; + while(mit!=data.end()){ + if(verbose) + cout << mit->first << " " << flush; + if(!nband) + nband=fields.size(); + if(pos) + assert((mit->second)[0].size()==nband+2); + else + assert((mit->second)[0].size()==nband); + ++mit; + } + if(verbose) + cout << endl << flush; + return(nband); + } + else{ + ostringstream ess; + ess << "no layer in " << m_filename; + throw(ess.str()); + } +} + //read x positions template <typename T> int ImgReaderOgr::readXY(vector<T>& xVector, vector<T>& yVector, int layer, bool verbose){ if(layer<0) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git _______________________________________________ Pkg-grass-devel mailing list Pkg-grass-devel@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-grass-devel