This is an automated email from the git hooks/post-receive script. sebastic-guest pushed a commit to branch upstream-master in repository pktools.
commit 58ca0504e03ac3c31988813ec053b4ad3225a71f Author: Pieter Kempeneers <kempe...@gmail.com> Date: Fri Sep 5 11:12:53 2014 +0200 pkfssvm and pktoptsvm update, make use of CostFactory class --- configure.ac | 3 +- qt/pkcrop_gui/mainwindow.cc | 8 +- src/algorithms/CostFactory.h | 8 +- src/algorithms/Makefile.am | 4 +- src/apps/Makefile.am | 4 +- src/apps/pkfsann.cc | 4 +- src/apps/pkfssvm.cc | 328 +++++++++++++++++++------------------- src/apps/pkfssvm.h | 57 ------- src/apps/pkoptsvm.cc | 363 ++++++++++++++++++++++++------------------- 9 files changed, 377 insertions(+), 402 deletions(-) diff --git a/configure.ac b/configure.ac index 99895c2..58dbe46 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,7 @@ AC_INIT([pktools], [2.5.3], [kempe...@gmail.com]) #AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AM_INIT_AUTOMAKE([-Wall -Wno-extra-portability foreign]) +#AM_INIT_AUTOMAKE([subdir-objects]) #not working due to bug in autoconf, see Debian list: Bug #752993) AC_CONFIG_MACRO_DIR([m4]) AX_LIB_GDAL() @@ -96,7 +97,7 @@ AC_SUBST([LIBS]) # For information on how to properly maintain the library version information, # refer to the libtool manual, section "Updating library version information": # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html -AC_SUBST([PKTOOLS_SO_VERSION], [1:0:0]) +AC_SUBST([PKTOOLS_SO_VERSION], [1:1:0]) # files to generate via autotools (.am or .in source files) AC_CONFIG_HEADERS([config.h]) diff --git a/qt/pkcrop_gui/mainwindow.cc b/qt/pkcrop_gui/mainwindow.cc index 5b1889a..e77f3d5 100644 --- a/qt/pkcrop_gui/mainwindow.cc +++ b/qt/pkcrop_gui/mainwindow.cc @@ -257,14 +257,16 @@ void MainWindow::on_toolButton_Run_clicked() ui->commandLineEdit->insert(program); -// QProcess *myProcess = new QProcess(parent); +// QProcess *myProcess = new QProcess(parent); QProcess *myProcess = new QProcess(this); myProcess->start(program); myProcess->setProcessChannelMode(QProcess::MergedChannels); + this->setCursor(Qt::WaitCursor); myProcess->waitForFinished(-1); - QString p_stderr = myProcess->readyReadStandardError(); + this->setCursor(Qt::ArrowCursor); + QMessageBox msgBox; + QString p_stderr = myProcess->readAllStandardError(); if(!p_stderr.isEmpty()){ - QMessageBox msgBox; msgBox.setText(p_stderr); msgBox.exec(); } diff --git a/src/algorithms/CostFactory.h b/src/algorithms/CostFactory.h index 82d3a12..441279e 100644 --- a/src/algorithms/CostFactory.h +++ b/src/algorithms/CostFactory.h @@ -38,8 +38,10 @@ public: std::map<std::string,short> getClassValueMap(){return m_classValueMap;}; std::vector<std::string> getNameVector(){return m_nameVector;}; void setNameVector(std::vector<std::string>& nameVector){m_nameVector=nameVector;}; - unsigned short getClassIndex(std::string classname) const {return m_cm.getClassIndex(classname);}; + int getClassIndex(std::string classname) const {return m_cm.getClassIndex(classname);}; + //pushBackClassName is for confusion matrix void pushBackClassName(std::string classname){m_cm.pushBackClassName(classname,true);};//doSort=true + //pushBackName is for nameVector in CostFactory void pushBackName(std::string classname){m_nameVector.push_back(classname);}; void setNcTraining(const std::vector<unsigned int> nctraining){m_nctraining=nctraining;}; void setNcTest(const std::vector<unsigned int> nctest){m_nctest=nctest;}; @@ -53,8 +55,8 @@ protected: std::vector<unsigned int> m_nctraining; std::vector<unsigned int> m_nctest; unsigned short m_cv; - std::string m_classname; + /* std::string m_classname; */ short m_classvalue; short m_verbose; }; -#endif /* _FEATURESELECTOR_H_ */ +#endif diff --git a/src/algorithms/Makefile.am b/src/algorithms/Makefile.am index ab568ab..e25cf61 100644 --- a/src/algorithms/Makefile.am +++ b/src/algorithms/Makefile.am @@ -25,7 +25,7 @@ libalgorithms_ladir = $(includedir)/pktools/algorithms libalgorithms_la_LDFLAGS = -version-info $(PKTOOLS_SO_VERSION) $(AM_LDFLAGS) # the list of header files that belong to the library (to be installed later) -libalgorithms_la_HEADERS = Egcs.h Filter2d.h Filter.h StatFactory.h ConfusionMatrix.h svm.h CostFactory.h FeatureSelector.h +libalgorithms_la_HEADERS = Egcs.h Filter2d.h Filter.h StatFactory.h ConfusionMatrix.h svm.h CostFactory.h CostFactorySVM.h FeatureSelector.h if USE_FANN libalgorithms_la_HEADERS += myfann_cpp.h @@ -36,7 +36,7 @@ libalgorithms_la_HEADERS += OptFactory.h endif # the sources to add to the library and to add to the source distribution -libalgorithms_la_SOURCES = $(libalgorithms_la_HEADERS) Egcs.cc Filter2d.cc Filter.cc ConfusionMatrix.cc svm.cpp +libalgorithms_la_SOURCES = $(libalgorithms_la_HEADERS) Egcs.cc Filter2d.cc Filter.cc ConfusionMatrix.cc svm.cpp CostFactorySVM.cc ############################################################################### # list of sources for the binaries diff --git a/src/apps/Makefile.am b/src/apps/Makefile.am index 6aeeeae..d2d7bba 100644 --- a/src/apps/Makefile.am +++ b/src/apps/Makefile.am @@ -31,7 +31,7 @@ pklas2img_LDADD = $(top_srcdir)/src/lasclasses/liblasClasses.la -llas $(AM_LDFLA endif if USE_NLOPT bin_PROGRAMS += pkoptsvm -pkoptsvm_SOURCES = $(top_srcdir)/src/algorithms/OptFactory.h pkoptsvm.cc +pkoptsvm_SOURCES = $(top_srcdir)/src/algorithms/OptFactory.h $(top_srcdir)/src/algorithms/CostFactorySVM.h pkoptsvm.cc pkoptsvm_LDADD = $(GSL_LIBS) $(AM_LDFLAGS) -lnlopt endif # list of sources for the binaries @@ -66,7 +66,7 @@ pkcomposite_SOURCES = pkcomposite.cc pkndvi_SOURCES = pkndvi.cc pkpolygonize_SOURCES = pkpolygonize.cc pksvm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/svm.cpp pksvm.cc -pkfssvm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/FeatureSelector.h $(top_srcdir)/src/algorithms/CostFactory.h $(top_srcdir)/src/algorithms/svm.cpp pkfssvm.h pkfssvm.cc +pkfssvm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/FeatureSelector.h $(top_srcdir)/src/algorithms/CostFactorySVM.h $(top_srcdir)/src/algorithms/svm.cpp pkfssvm.cc pkfssvm_LDADD = $(GSL_LIBS) $(AM_LDFLAGS) -lalgorithms pkascii2img_SOURCES = pkascii2img.cc pkascii2ogr_SOURCES = pkascii2ogr.cc diff --git a/src/apps/pkfsann.cc b/src/apps/pkfsann.cc index a28faa1..9bee67d 100644 --- a/src/apps/pkfsann.cc +++ b/src/apps/pkfsann.cc @@ -174,11 +174,11 @@ int main(int argc, char *argv[]) Optionpk<string> input_opt("i", "input", "input test set (leave empty to perform a cross validation based on training only)"); Optionpk<string> training_opt("t", "training", "training vector file. A single vector file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); Optionpk<string> tlayer_opt("tln", "tln", "training layer name(s)"); - Optionpk<string> label_opt("\0", "label", "identifier for class label in training vector file.","label"); + Optionpk<string> label_opt("label", "label", "identifier for class label in training vector file.","label"); Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0); Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0); Optionpk<bool> random_opt("random","random", "in case of balance, randomize input data", true); - Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0); + Optionpk<int> minSize_opt("min", "min", "if number of training pixels is less then min, do not take this class into account", 0); Optionpk<double> start_opt("s", "start", "start band sequence number",0); Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 to include all bands)", 0); Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)"); diff --git a/src/apps/pkfssvm.cc b/src/apps/pkfssvm.cc index 7d5fb50..3a8a222 100644 --- a/src/apps/pkfssvm.cc +++ b/src/apps/pkfssvm.cc @@ -24,11 +24,10 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>. #include <algorithm> #include "base/Optionpk.h" #include "algorithms/ConfusionMatrix.h" -#include "algorithms/CostFactory.h" +#include "algorithms/CostFactorySVM.h" #include "algorithms/FeatureSelector.h" #include "algorithms/svm.h" #include "imageclasses/ImgReaderOgr.h" -#include "pkfssvm.h" #ifdef HAVE_CONFIG_H #include <config.h> @@ -36,169 +35,162 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>. using namespace std; -#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) - -//global parameters used in cost function getCost -// ConfusionMatrix cm; -// map<string,short> classValueMap; -// vector<std::string> nameVector; -// vector<unsigned int> nctraining; -// vector<unsigned int> nctest; - -CostFactorySVM::CostFactorySVM() - : CostFactory(2,0), m_svm_type("C_SVC"), m_kernel_type("radial"), m_kernel_degree(3), m_gamma(1.0), m_coef0(0), m_ccost(1000), m_nu(0.5), m_epsilon_loss(100), m_cache(100), m_epsilon_tol(0.001), m_shrinking(false), m_prob_est(true){ -} - -CostFactorySVM::~CostFactorySVM(){ -} - -CostFactorySVM::CostFactorySVM(std::string svm_type, std::string kernel_type, unsigned short kernel_degree, float gamma, float coef0, float ccost, float nu, float epsilon_loss, int cache, float epsilon_tol, bool shrinking, bool prob_est, unsigned short cv, bool verbose) - : CostFactory(cv,verbose), m_svm_type(svm_type), m_kernel_type(kernel_type), m_kernel_degree(kernel_degree), m_gamma(gamma), m_coef0(coef0), m_ccost(ccost), m_nu(nu), m_epsilon_loss(epsilon_loss), m_cache(cache), m_epsilon_tol(epsilon_tol), m_shrinking(shrinking), m_prob_est(prob_est){}; - -double CostFactorySVM::getCost(const vector<Vector2d<float> > &trainingFeatures){ - std::map<std::string, svm::SVM_TYPE> svmMap; - - svmMap["C_SVC"]=svm::C_SVC; - svmMap["nu_SVC"]=svm::nu_SVC; - svmMap["one_class"]=svm::one_class; - svmMap["epsilon_SVR"]=svm::epsilon_SVR; - svmMap["nu_SVR"]=svm::nu_SVR; - - std::map<std::string, svm::KERNEL_TYPE> kernelMap; - - kernelMap["linear"]=svm::linear; - kernelMap["polynomial"]=svm::polynomial; - kernelMap["radial"]=svm::radial; - kernelMap["sigmoid;"]=svm::sigmoid; - - unsigned short nclass=trainingFeatures.size(); - unsigned int ntraining=0; - unsigned int ntest=0; - for(int iclass=0;iclass<nclass;++iclass){ - ntraining+=m_nctraining[iclass]; - ntest+=m_nctest[iclass]; - } - if(ntest) - assert(!m_cv); - if(!m_cv) - assert(ntest); - unsigned short nFeatures=trainingFeatures[0][0].size(); - - struct svm_parameter param; - param.svm_type = svmMap[m_svm_type]; - param.kernel_type = kernelMap[m_kernel_type]; - param.degree = m_kernel_degree; - param.gamma = (m_gamma>0)? m_gamma : 1.0/nFeatures; - param.coef0 = m_coef0; - param.nu = m_nu; - param.cache_size = m_cache; - param.C = m_ccost; - param.eps = m_epsilon_tol; - param.p = m_epsilon_loss; - param.shrinking = (m_shrinking)? 1 : 0; - param.probability = (m_prob_est)? 1 : 0; - param.nr_weight = 0;//not used: I use priors and balancing - param.weight_label = NULL; - param.weight = NULL; - param.verbose=(m_verbose>1)? true:false; - struct svm_model* svm; - struct svm_problem prob; - struct svm_node* x_space; - - prob.l=ntraining; - prob.y = Malloc(double,prob.l); - prob.x = Malloc(struct svm_node *,prob.l); - x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining); - unsigned long int spaceIndex=0; - int lIndex=0; - for(int iclass=0;iclass<nclass;++iclass){ - // for(int isample=0;isample<trainingFeatures[iclass].size();++isample){ - for(int isample=0;isample<m_nctraining[iclass];++isample){ - prob.x[lIndex]=&(x_space[spaceIndex]); - for(int ifeature=0;ifeature<nFeatures;++ifeature){ - x_space[spaceIndex].index=ifeature+1; - x_space[spaceIndex].value=trainingFeatures[iclass][isample][ifeature]; - ++spaceIndex; - } - x_space[spaceIndex++].index=-1; - prob.y[lIndex]=iclass; - ++lIndex; - } - } - - assert(lIndex==prob.l); - if(m_verbose>2) - std::cout << "checking parameters" << std::endl; - svm_check_parameter(&prob,¶m); - if(m_verbose>2) - std::cout << "parameters ok, training" << std::endl; - svm=svm_train(&prob,¶m); - if(m_verbose>2) - std::cout << "SVM is now trained" << std::endl; - - m_cm.clearResults(); - if(m_cv>1){ - double *target = Malloc(double,prob.l); - svm_cross_validation(&prob,¶m,m_cv,target); - assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression - for(int i=0;i<prob.l;i++){ - string refClassName=m_nameVector[prob.y[i]]; - string className=m_nameVector[target[i]]; - if(m_classValueMap.size()) - m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0); - else - m_cm.incrementResult(m_cm.getClass(prob.y[i]),m_cm.getClass(target[i]),1.0); - } - free(target); - } - else{ - struct svm_node *x_test; - vector<double> result(nclass); - x_test = Malloc(struct svm_node,(nFeatures+1)); - for(int iclass=0;iclass<nclass;++iclass){ - for(int isample=0;isample<m_nctest[iclass];++isample){ - for(int ifeature=0;ifeature<nFeatures;++ifeature){ - x_test[ifeature].index=ifeature+1; - x_test[ifeature].value=trainingFeatures[iclass][m_nctraining[iclass]+isample][ifeature]; - } - x_test[nFeatures].index=-1; - double predict_label=0; - assert(svm_check_probability_model(svm)); - predict_label = svm_predict_probability(svm,x_test,&(result[0])); - // predict_label = svm_predict(svm,x_test); - string refClassName=m_nameVector[iclass]; - string className=m_nameVector[static_cast<short>(predict_label)]; - if(m_classValueMap.size()) - m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0); - else - m_cm.incrementResult(refClassName,className,1.0); - } - } - free(x_test); - } - if(m_verbose>1) - std::cout << m_cm << std::endl; - assert(m_cm.nReference()); - // if(m_verbose) - - // std::cout << m_cm << std::endl; - // std::cout << "Kappa: " << m_cm.kappa() << std::endl; - // double se95_oa=0; - // double doa=0; - // doa=m_cm.oa_pct(&se95_oa); - // std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; - - // *NOTE* Because svm_model contains pointers to svm_problem, you can - // not free the memory used by svm_problem if you are still using the - // svm_model produced by svm_train(). - // however, we will re-train the svm later on after the feature selection - free(prob.y); - free(prob.x); - free(x_space); - svm_free_and_destroy_model(&(svm)); - - return(m_cm.kappa()); -} +enum SelectorValue { NA=0, SFFS=1, SFS=2, SBS=3, BFS=4}; + +// CostFactorySVM::CostFactorySVM() +// : CostFactory(2,0), m_svm_type("C_SVC"), m_kernel_type("radial"), m_kernel_degree(3), m_gamma(1.0), m_coef0(0), m_ccost(1000), m_nu(0.5), m_epsilon_loss(100), m_cache(100), m_epsilon_tol(0.001), m_shrinking(false), m_prob_est(true){ +// } + +// CostFactorySVM::~CostFactorySVM(){ +// } + +// CostFactorySVM::CostFactorySVM(std::string svm_type, std::string kernel_type, unsigned short kernel_degree, float gamma, float coef0, float ccost, float nu, float epsilon_loss, int cache, float epsilon_tol, bool shrinking, bool prob_est, unsigned short cv, bool verbose) +// : CostFactory(cv,verbose), m_svm_type(svm_type), m_kernel_type(kernel_type), m_kernel_degree(kernel_degree), m_gamma(gamma), m_coef0(coef0), m_ccost(ccost), m_nu(nu), m_epsilon_loss(epsilon_loss), m_cache(cache), m_epsilon_tol(epsilon_tol), m_shrinking(shrinking), m_prob_est(prob_est){}; + +// double CostFactorySVM::getCost(const vector<Vector2d<float> > &trainingFeatures){ +// std::map<std::string, svm::SVM_TYPE> svmMap; + +// svmMap["C_SVC"]=svm::C_SVC; +// svmMap["nu_SVC"]=svm::nu_SVC; +// svmMap["one_class"]=svm::one_class; +// svmMap["epsilon_SVR"]=svm::epsilon_SVR; +// svmMap["nu_SVR"]=svm::nu_SVR; + +// std::map<std::string, svm::KERNEL_TYPE> kernelMap; + +// kernelMap["linear"]=svm::linear; +// kernelMap["polynomial"]=svm::polynomial; +// kernelMap["radial"]=svm::radial; +// kernelMap["sigmoid;"]=svm::sigmoid; + +// unsigned short nclass=trainingFeatures.size(); +// unsigned int ntraining=0; +// unsigned int ntest=0; +// for(int iclass=0;iclass<nclass;++iclass){ +// ntraining+=m_nctraining[iclass]; +// ntest+=m_nctest[iclass]; +// } +// if(ntest) +// assert(!m_cv); +// if(!m_cv) +// assert(ntest); +// unsigned short nFeatures=trainingFeatures[0][0].size(); + +// struct svm_parameter param; +// param.svm_type = svmMap[m_svm_type]; +// param.kernel_type = kernelMap[m_kernel_type]; +// param.degree = m_kernel_degree; +// param.gamma = (m_gamma>0)? m_gamma : 1.0/nFeatures; +// param.coef0 = m_coef0; +// param.nu = m_nu; +// param.cache_size = m_cache; +// param.C = m_ccost; +// param.eps = m_epsilon_tol; +// param.p = m_epsilon_loss; +// param.shrinking = (m_shrinking)? 1 : 0; +// param.probability = (m_prob_est)? 1 : 0; +// param.nr_weight = 0;//not used: I use priors and balancing +// param.weight_label = NULL; +// param.weight = NULL; +// param.verbose=(m_verbose>1)? true:false; +// struct svm_model* svm; +// struct svm_problem prob; +// struct svm_node* x_space; + +// prob.l=ntraining; +// prob.y = Malloc(double,prob.l); +// prob.x = Malloc(struct svm_node *,prob.l); +// x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining); +// unsigned long int spaceIndex=0; +// int lIndex=0; +// for(int iclass=0;iclass<nclass;++iclass){ +// // for(int isample=0;isample<trainingFeatures[iclass].size();++isample){ +// for(int isample=0;isample<m_nctraining[iclass];++isample){ +// prob.x[lIndex]=&(x_space[spaceIndex]); +// for(int ifeature=0;ifeature<nFeatures;++ifeature){ +// x_space[spaceIndex].index=ifeature+1; +// x_space[spaceIndex].value=trainingFeatures[iclass][isample][ifeature]; +// ++spaceIndex; +// } +// x_space[spaceIndex++].index=-1; +// prob.y[lIndex]=iclass; +// ++lIndex; +// } +// } + +// assert(lIndex==prob.l); +// if(m_verbose>2) +// std::cout << "checking parameters" << std::endl; +// svm_check_parameter(&prob,¶m); +// if(m_verbose>2) +// std::cout << "parameters ok, training" << std::endl; +// svm=svm_train(&prob,¶m); +// if(m_verbose>2) +// std::cout << "SVM is now trained" << std::endl; + +// m_cm.clearResults(); +// if(m_cv>1){ +// double *target = Malloc(double,prob.l); +// svm_cross_validation(&prob,¶m,m_cv,target); +// assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression +// for(int i=0;i<prob.l;i++){ +// string refClassName=m_nameVector[prob.y[i]]; +// string className=m_nameVector[target[i]]; +// if(m_classValueMap.size()) +// m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0); +// else +// m_cm.incrementResult(m_cm.getClass(prob.y[i]),m_cm.getClass(target[i]),1.0); +// } +// free(target); +// } +// else{ +// struct svm_node *x_test; +// vector<double> result(nclass); +// x_test = Malloc(struct svm_node,(nFeatures+1)); +// for(int iclass=0;iclass<nclass;++iclass){ +// for(int isample=0;isample<m_nctest[iclass];++isample){ +// for(int ifeature=0;ifeature<nFeatures;++ifeature){ +// x_test[ifeature].index=ifeature+1; +// x_test[ifeature].value=trainingFeatures[iclass][m_nctraining[iclass]+isample][ifeature]; +// } +// x_test[nFeatures].index=-1; +// double predict_label=0; +// assert(svm_check_probability_model(svm)); +// predict_label = svm_predict_probability(svm,x_test,&(result[0])); +// // predict_label = svm_predict(svm,x_test); +// string refClassName=m_nameVector[iclass]; +// string className=m_nameVector[static_cast<short>(predict_label)]; +// if(m_classValueMap.size()) +// m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0); +// else +// m_cm.incrementResult(refClassName,className,1.0); +// } +// } +// free(x_test); +// } +// if(m_verbose>1) +// std::cout << m_cm << std::endl; +// assert(m_cm.nReference()); +// // if(m_verbose) + +// // std::cout << m_cm << std::endl; +// // std::cout << "Kappa: " << m_cm.kappa() << std::endl; +// // double se95_oa=0; +// // double doa=0; +// // doa=m_cm.oa_pct(&se95_oa); +// // std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; + +// // *NOTE* Because svm_model contains pointers to svm_problem, you can +// // not free the memory used by svm_problem if you are still using the +// // svm_model produced by svm_train(). +// // however, we will re-train the svm later on after the feature selection +// free(prob.y); +// free(prob.x); +// free(x_space); +// svm_free_and_destroy_model(&(svm)); + +// return(m_cm.kappa()); +// } int main(int argc, char *argv[]) { @@ -208,11 +200,11 @@ int main(int argc, char *argv[]) Optionpk<string> input_opt("i", "input", "input test set (leave empty to perform a cross validation based on training only)"); Optionpk<string> training_opt("t", "training", "training vector file. A single vector file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option)."); Optionpk<string> tlayer_opt("tln", "tln", "training layer name(s)"); - Optionpk<string> label_opt("\0", "label", "identifier for class label in training vector file.","label"); + Optionpk<string> label_opt("label", "label", "identifier for class label in training vector file.","label"); Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0); - Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0); + Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0); Optionpk<bool> random_opt("random","random", "in case of balance, randomize input data", true); - Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0); + Optionpk<int> minSize_opt("min", "min", "if number of training pixels is less then min, do not take this class into account", 0); Optionpk<double> start_opt("s", "start", "start band sequence number",0); Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 to include all bands)", 0); Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)"); diff --git a/src/apps/pkfssvm.h b/src/apps/pkfssvm.h deleted file mode 100644 index c4d3f72..0000000 --- a/src/apps/pkfssvm.h +++ /dev/null @@ -1,57 +0,0 @@ -/********************************************************************** -pkfssvm.h: feature selection for svm classifier -Copyright (C) 2008-2014 Pieter Kempeneers - -This file is part of pktools - -pktools is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -pktools is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with pktools. If not, see <http://www.gnu.org/licenses/>. -***********************************************************************/ -#include <string> -#include <vector> -#include "base/Vector2d.h" - -#ifndef _PKFSSVMH_H_ -#define _PKFSSVMH_H_ -namespace svm{ - enum SVM_TYPE {C_SVC=0, nu_SVC=1,one_class=2, epsilon_SVR=3, nu_SVR=4}; - enum KERNEL_TYPE {linear=0,polynomial=1,radial=2,sigmoid=3}; -} - -enum SelectorValue { NA=0, SFFS=1, SFS=2, SBS=3, BFS=4}; - -class CostFactorySVM : public CostFactory -{ -public: -CostFactorySVM(); -CostFactorySVM(std::string svm_type, std::string kernel_type, unsigned short kernel_degree, float gamma, float coef0, float ccost, float nu, float epsilon_loss, int cache, float epsilon_tol, bool shrinking, bool prob_est, unsigned short cv, bool verbose); -~CostFactorySVM(); -double getCost(const std::vector<Vector2d<float> > &trainingFeatures); - -private: -std::string m_svm_type; -std::string m_kernel_type; -unsigned short m_kernel_degree; -float m_gamma; -float m_coef0; -float m_ccost; -float m_nu; -float m_epsilon_loss; -int m_cache; -float m_epsilon_tol; -bool m_shrinking; -bool m_prob_est; -}; - - -#endif diff --git a/src/apps/pkoptsvm.cc b/src/apps/pkoptsvm.cc index ffcc488..83fd8cf 100644 --- a/src/apps/pkoptsvm.cc +++ b/src/apps/pkoptsvm.cc @@ -28,6 +28,7 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>. #include "algorithms/ConfusionMatrix.h" #include "algorithms/FeatureSelector.h" #include "algorithms/OptFactory.h" +#include "algorithms/CostFactorySVM.h" #include "algorithms/svm.h" #include "imageclasses/ImgReaderOgr.h" @@ -35,11 +36,6 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>. #include <config.h> #endif -namespace svm{ - enum SVM_TYPE {C_SVC=0, nu_SVC=1,one_class=2, epsilon_SVR=3, nu_SVR=4}; - enum KERNEL_TYPE {linear=0,polynomial=1,radial=2,sigmoid=3}; -} - using namespace std; #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) @@ -70,21 +66,6 @@ Optionpk<short> verbose_opt("v", "verbose", "use 1 to output intermediate result double objFunction(const std::vector<double> &x, std::vector<double> &grad, void *my_func_data){ - std::map<std::string, svm::SVM_TYPE> svmMap; - - svmMap["C_SVC"]=svm::C_SVC; - svmMap["nu_SVC"]=svm::nu_SVC; - svmMap["one_class"]=svm::one_class; - svmMap["epsilon_SVR"]=svm::epsilon_SVR; - svmMap["nu_SVR"]=svm::nu_SVR; - - std::map<std::string, svm::KERNEL_TYPE> kernelMap; - - kernelMap["linear"]=svm::linear; - kernelMap["polynomial"]=svm::polynomial; - kernelMap["radial"]=svm::radial; - kernelMap["sigmoid;"]=svm::sigmoid; - assert(grad.empty()); vector<Vector2d<float> > *tf=reinterpret_cast<vector<Vector2d<float> >*> (my_func_data); float ccost=x[0]; @@ -92,138 +73,185 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void double error=1.0/epsilon_tol_opt[0]; double kappa=1.0; double oa=1.0; - //todo: calculate kappa using cross validation - unsigned short nclass=tf->size(); - unsigned int ntraining=0; - unsigned int ntest=0; - for(int iclass=0;iclass<nclass;++iclass){ - ntraining+=nctraining[iclass]; - ntest+=nctest[iclass]; - } - if(ntest) - cv_opt[0]=0; - if(!cv_opt[0]) - assert(ntest); - // ntraining+=(*tf)[iclass].size(); - unsigned short nFeatures=(*tf)[0][0].size(); - struct svm_parameter param; - param.svm_type = svmMap[svm_type_opt[0]]; - param.kernel_type = kernelMap[kernel_type_opt[0]]; - param.degree = kernel_degree_opt[0]; - param.gamma = gamma; - param.coef0 = coef0_opt[0]; - param.nu = nu_opt[0]; - param.cache_size = cache_opt[0]; - param.C = ccost; - param.eps = epsilon_tol_opt[0]; - param.p = epsilon_loss_opt[0]; - param.shrinking = (shrinking_opt[0])? 1 : 0; - param.probability = (prob_est_opt[0])? 1 : 0; - param.nr_weight = 0;//not used: I use priors and balancing - param.weight_label = NULL; - param.weight = NULL; - param.verbose=(verbose_opt[0]>2)? true:false; - struct svm_model* svm; - struct svm_problem prob; - struct svm_node* x_space; - prob.l=ntraining; - prob.y = Malloc(double,prob.l); - prob.x = Malloc(struct svm_node *,prob.l); - x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining); - unsigned long int spaceIndex=0; - int lIndex=0; - for(int iclass=0;iclass<nclass;++iclass){ - // for(int isample=0;isample<(*tf)[iclass].size();++isample){ - for(int isample=0;isample<nctraining[iclass];++isample){ - prob.x[lIndex]=&(x_space[spaceIndex]); - for(int ifeature=0;ifeature<nFeatures;++ifeature){ - x_space[spaceIndex].index=ifeature+1; - x_space[spaceIndex].value=(*tf)[iclass][isample][ifeature]; - ++spaceIndex; - } - x_space[spaceIndex++].index=-1; - prob.y[lIndex]=iclass; - ++lIndex; - } - } - assert(lIndex==prob.l); - if(verbose_opt[0]>2) - std::cout << "checking parameters" << std::endl; - svm_check_parameter(&prob,¶m); - if(verbose_opt[0]>2) - std::cout << "parameters ok, training" << std::endl; - svm=svm_train(&prob,¶m); - if(verbose_opt[0]>2) - std::cout << "SVM is now trained" << std::endl; - - ConfusionMatrix cm; + CostFactorySVM costfactory(svm_type_opt[0], kernel_type_opt[0], kernel_degree_opt[0], gamma, coef0_opt[0], ccost, nu_opt[0], epsilon_loss_opt[0], cache_opt[0], epsilon_tol_opt[0], shrinking_opt[0], prob_est_opt[0], cv_opt[0], verbose_opt[0]); + + assert(tf->size()); + // if(nctest>0) + // costfactory.setCv(0); + + costfactory.setCv(cv_opt[0]); + + if(classname_opt.size()){ + assert(classname_opt.size()==classvalue_opt.size()); + for(int iclass=0;iclass<classname_opt.size();++iclass) + costfactory.setClassValueMap(classname_opt[iclass],classvalue_opt[iclass]); + } //set names in confusion matrix using nameVector + costfactory.setNameVector(nameVector); + // vector<string> nameVector=costfactory.getNameVector(); for(int iname=0;iname<nameVector.size();++iname){ - if(classValueMap.empty()) - cm.pushBackClassName(nameVector[iname]); - else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0) - cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]])); - } - if(cv_opt[0]>1){ - double *target = Malloc(double,prob.l); - svm_cross_validation(&prob,¶m,cv_opt[0],target); - assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression - for(int i=0;i<prob.l;i++){ - string refClassName=nameVector[prob.y[i]]; - string className=nameVector[target[i]]; - if(classValueMap.size()) - cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0); - else - cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0); - } - free(target); - } - else{ - struct svm_node *x_test; - x_test = Malloc(struct svm_node,(nFeatures+1)); - for(int iclass=0;iclass<nclass;++iclass){ - for(int isample=0;isample<nctest[iclass];++isample){ - for(int ifeature=0;ifeature<nFeatures;++ifeature){ - x_test[ifeature].index=ifeature+1; - x_test[ifeature].value=(*tf)[iclass][nctraining[iclass]+isample][ifeature]; - } - x_test[nFeatures].index=-1; - double predict_label=0; - //todo: make distinction between svm_predict and svm_predict_probability? - predict_label = svm_predict(svm,x_test); - string refClassName=nameVector[iclass]; - string className=nameVector[static_cast<short>(predict_label)]; - if(classValueMap.size()) - cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0); - else - cm.incrementResult(refClassName,className,1.0); - } + if(costfactory.getClassValueMap().empty()){ + costfactory.pushBackClassName(nameVector[iname]); + // cm.pushBackClassName(nameVector[iname]); } - free(x_test); - } - if(verbose_opt[0]>1) - std::cout << cm << std::endl; - assert(cm.nReference()); - free(prob.y); - free(prob.x); - free(x_space); - svm_free_and_destroy_model(&(svm)); - if(verbose_opt[0]>2) - std::cout << cm << std::endl; - kappa=cm.kappa(); - oa=cm.oa(); - if(verbose_opt[0]>1){ - std::cout << " --ccost " << x[0]; - std::cout << " --gamma " << x[1]; - std::cout << std::endl; - std::cout << "oa: " << oa << std::endl; - std::cout << "kappa: " << kappa << std::endl; + else if(costfactory.getClassIndex(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]]))<0) + costfactory.pushBackClassName(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]])); } - double cost=(costfunction_opt[0])? oa : kappa; - if(cost>0) - error=1.0/cost; - return(error); + + costfactory.setNcTraining(nctraining); + costfactory.setNcTest(nctest); + + kappa=costfactory.getCost(*tf); + return(kappa); + + // std::map<std::string, svm::SVM_TYPE> svmMap; + + // svmMap["C_SVC"]=svm::C_SVC; + // svmMap["nu_SVC"]=svm::nu_SVC; + // svmMap["one_class"]=svm::one_class; + // svmMap["epsilon_SVR"]=svm::epsilon_SVR; + // svmMap["nu_SVR"]=svm::nu_SVR; + + // std::map<std::string, svm::KERNEL_TYPE> kernelMap; + + // kernelMap["linear"]=svm::linear; + // kernelMap["polynomial"]=svm::polynomial; + // kernelMap["radial"]=svm::radial; + // kernelMap["sigmoid;"]=svm::sigmoid; + + // unsigned short nclass=tf->size(); + // unsigned int ntraining=0; + // unsigned int ntest=0; + // for(int iclass=0;iclass<nclass;++iclass){ + // ntraining+=nctraining[iclass]; + // ntest+=nctest[iclass]; + // } + // if(ntest) + // cv_opt[0]=0; + // if(!cv_opt[0]) + // assert(ntest); + + // unsigned short nFeatures=(*tf)[0][0].size(); + // struct svm_parameter param; + // param.svm_type = svmMap[svm_type_opt[0]]; + // param.kernel_type = kernelMap[kernel_type_opt[0]]; + // param.degree = kernel_degree_opt[0]; + // param.gamma = gamma; + // param.coef0 = coef0_opt[0]; + // param.nu = nu_opt[0]; + // param.cache_size = cache_opt[0]; + // param.C = ccost; + // param.eps = epsilon_tol_opt[0]; + // param.p = epsilon_loss_opt[0]; + // param.shrinking = (shrinking_opt[0])? 1 : 0; + // param.probability = (prob_est_opt[0])? 1 : 0; + // param.nr_weight = 0;//not used: I use priors and balancing + // param.weight_label = NULL; + // param.weight = NULL; + // param.verbose=(verbose_opt[0]>2)? true:false; + // struct svm_model* svm; + // struct svm_problem prob; + // struct svm_node* x_space; + + // prob.l=ntraining; + // prob.y = Malloc(double,prob.l); + // prob.x = Malloc(struct svm_node *,prob.l); + // x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining); + // unsigned long int spaceIndex=0; + // int lIndex=0; + // for(int iclass=0;iclass<nclass;++iclass){ + // // for(int isample=0;isample<(*tf)[iclass].size();++isample){ + // for(int isample=0;isample<nctraining[iclass];++isample){ + // prob.x[lIndex]=&(x_space[spaceIndex]); + // for(int ifeature=0;ifeature<nFeatures;++ifeature){ + // x_space[spaceIndex].index=ifeature+1; + // x_space[spaceIndex].value=(*tf)[iclass][isample][ifeature]; + // ++spaceIndex; + // } + // x_space[spaceIndex++].index=-1; + // prob.y[lIndex]=iclass; + // ++lIndex; + // } + // } + + // assert(lIndex==prob.l); + // if(verbose_opt[0]>2) + // std::cout << "checking parameters" << std::endl; + // svm_check_parameter(&prob,¶m); + // if(verbose_opt[0]>2) + // std::cout << "parameters ok, training" << std::endl; + // svm=svm_train(&prob,¶m); + // if(verbose_opt[0]>2) + // std::cout << "SVM is now trained" << std::endl; + + // ConfusionMatrix cm; + // //set names in confusion matrix using nameVector + // for(int iname=0;iname<nameVector.size();++iname){ + // if(classValueMap.empty()) + // cm.pushBackClassName(nameVector[iname]); + // else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0) + // cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]])); + // } + // if(cv_opt[0]>1){ + // double *target = Malloc(double,prob.l); + // svm_cross_validation(&prob,¶m,cv_opt[0],target); + // assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression + // for(int i=0;i<prob.l;i++){ + // string refClassName=nameVector[prob.y[i]]; + // string className=nameVector[target[i]]; + // if(classValueMap.size()) + // cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0); + // else + // cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0); + // } + // free(target); + // } + // else{ + // struct svm_node *x_test; + // x_test = Malloc(struct svm_node,(nFeatures+1)); + // for(int iclass=0;iclass<nclass;++iclass){ + // for(int isample=0;isample<nctest[iclass];++isample){ + // for(int ifeature=0;ifeature<nFeatures;++ifeature){ + // x_test[ifeature].index=ifeature+1; + // x_test[ifeature].value=(*tf)[iclass][nctraining[iclass]+isample][ifeature]; + // } + // x_test[nFeatures].index=-1; + // double predict_label=0; + // //todo: make distinction between svm_predict and svm_predict_probability? + // predict_label = svm_predict(svm,x_test); + // string refClassName=nameVector[iclass]; + // string className=nameVector[static_cast<short>(predict_label)]; + // if(classValueMap.size()) + // cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0); + // else + // cm.incrementResult(refClassName,className,1.0); + // } + // } + // free(x_test); + // } + // if(verbose_opt[0]>1) + // std::cout << cm << std::endl; + // assert(cm.nReference()); + // free(prob.y); + // free(prob.x); + // free(x_space); + // svm_free_and_destroy_model(&(svm)); + // if(verbose_opt[0]>2) + // std::cout << cm << std::endl; + // kappa=cm.kappa(); + // oa=cm.oa(); + // if(verbose_opt[0]>1){ + // std::cout << " --ccost " << x[0]; + // std::cout << " --gamma " << x[1]; + // std::cout << std::endl; + // std::cout << "oa: " << oa << std::endl; + // std::cout << "kappa: " << kappa << std::endl; + // } + // double cost=(costfunction_opt[0])? oa : kappa; + // if(cost>0) + // error=1.0/cost; + // return(error); } int main(int argc, char *argv[]) @@ -231,13 +259,13 @@ int main(int argc, char *argv[]) map<short,int> reclassMap; vector<int> vreclass; Optionpk<string> training_opt("t", "training", "training vector file. A single vector file contains all training features (must be set as: b0, b1, b2,...) for all classes (class numbers identified by label option)."); - Optionpk<string> input_opt("i", "input", "input test vectro file"); + Optionpk<string> input_opt("i", "input", "input test vector file"); Optionpk<string> tlayer_opt("tln", "tln", "training layer name(s)"); - Optionpk<string> label_opt("\0", "label", "identifier for class label in training vector file.","label"); + Optionpk<string> label_opt("label", "label", "identifier for class label in training vector file.","label"); // Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0); - Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0); + Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0); Optionpk<bool> random_opt("random","random", "in case of balance, randomize input data", true); - Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0); + Optionpk<int> minSize_opt("min", "min", "if number of training pixels is less then min, do not take this class into account", 0); Optionpk<double> start_opt("s", "start", "start band sequence number",0); Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 to include all bands)", 0); Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)"); @@ -374,6 +402,9 @@ int main(int argc, char *argv[]) trainingReader.close(); } if(trainingMap.size()<2){ + // map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); + // while(mapit!=trainingMap.end()) + // cerr << mapit->first << " -> " << classValueMap[mapit->first] << std::endl; string errorstring="Error: could not read at least two classes from training input file"; throw(errorstring); } @@ -596,9 +627,12 @@ int main(int argc, char *argv[]) if(algorithm_opt[0]=="GRID"){ if(step_opt.size()<2)//[0] for cost, [1] for gamma step_opt.push_back(step_opt.back()); - double minError=1000; - double minCost=0; - double minGamma=0; + // double minError=1000; + // double minCost=0; + // double minGamma=0; + double maxKappa=0; + double maxCost=0; + double maxGamma=0; const char* pszMessage; void* pProgressArg=NULL; GDALProgressFunc pfnProgress=GDALTermProgress; @@ -612,15 +646,15 @@ int main(int argc, char *argv[]) x[0]=ccost; x[1]=gamma; std::vector<double> theGrad; - double error=0; - error=objFunction(x,theGrad,&trainingFeatures); - if(error<minError){ - minError=error; - minCost=ccost; - minGamma=gamma; + double kappa=0; + kappa=objFunction(x,theGrad,&trainingFeatures); + if(kappa>maxKappa){ + maxKappa=kappa; + maxCost=ccost; + maxGamma=gamma; } if(verbose_opt[0]) - std::cout << ccost << " " << gamma << " " << error<< std::endl; + std::cout << ccost << " " << gamma << " " << kappa<< std::endl; progress+=1.0/ncost/ngamma; if(!verbose_opt[0]) pfnProgress(progress,pszMessage,pProgressArg); @@ -629,8 +663,8 @@ int main(int argc, char *argv[]) progress=1.0; if(!verbose_opt[0]) pfnProgress(progress,pszMessage,pProgressArg); - x[0]=minCost; - x[1]=minGamma; + x[0]=maxCost; + x[1]=maxGamma; } else{ nlopt::opt optimizer=OptFactory::getOptimizer(algorithm_opt[0],2); @@ -646,7 +680,8 @@ int main(int argc, char *argv[]) init[1]=(gamma_opt[2]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size(); ub[0]=ccost_opt[1]; ub[1]=(gamma_opt[1]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size(); - optimizer.set_min_objective(objFunction, &trainingFeatures); + // optimizer.set_min_objective(objFunction, &trainingFeatures); + optimizer.set_max_objective(objFunction, &trainingFeatures); optimizer.set_lower_bounds(lb); optimizer.set_upper_bounds(ub); if(verbose_opt[0]>1) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git _______________________________________________ Pkg-grass-devel mailing list Pkg-grass-devel@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-grass-devel