From 85a4962556b67d1cc0668ecb2fbb03b3b4dd6e7e Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Sat, 11 Apr 2015 17:46:19 +0800 Subject: completed & tested, train & predict --- main.cpp | 20 ++++++-- model/ranksvm.cpp | 4 +- model/ranksvm.h | 2 +- model/ranksvmtn.cpp | 117 +++++++++++++++++++++++------------------------ model/ranksvmtn.h | 2 +- tools/dataProvider.h | 11 ++--- tools/fileDataProvider.h | 11 ++++- 7 files changed, 93 insertions(+), 74 deletions(-) diff --git a/main.cpp b/main.cpp index 1cb18b9..e89cfe1 100644 --- a/main.cpp +++ b/main.cpp @@ -6,6 +6,7 @@ #include "model/ranksvmtn.h" #include "tools/fileDataProvider.h" #include "tools/matrixIO.h" +#include INITIALIZE_EASYLOGGINGPP @@ -25,6 +26,7 @@ int train() { LOG(INFO)<<"Training started"; dp.getDataSet(D); + LOG(INFO)<<"Read "<train(D); LOG(INFO)<<"Training finished,saving model"; @@ -39,15 +41,27 @@ int predict() { RSVM *rsvm; rsvm = RSVM::loadModel(vm["model"].as().c_str()); FileDP dp(vm["feature"].as().c_str()); + + dp.open(); DataList D; - std::list L; + std::vector L; + LOG(INFO)<<"Prediction started"; + while (!dp.EOFile()) { dp.getDataSet(D); + LOG(INFO)<<"Read "<predict(D,L); } - // TODO output Eigen::write_stream(std::cout, L); + LOG(INFO)<<"Training finished,saving prediction"; + std::ofstream fout(vm["output"].as().c_str()); + + for (int i=0; i(), "set input model file") - ("output,o", po::value(), "set output model file") + ("output,o", po::value(), "set output model/prediction file") ("feature,i", po::value(), "set input feature file"); // Parsing program options diff --git a/model/ranksvm.cpp b/model/ranksvm.cpp index dc2ad9f..7ee72ac 100644 --- a/model/ranksvm.cpp +++ b/model/ranksvm.cpp @@ -40,8 +40,8 @@ RSVM* RSVM::loadModel(const string fname){ } int RSVM::setModel(const SVMModel &model) { - if (model.weight.cols()!=fsize) - LOG(FATAL) << "Feature size mismatch: "<model.weight=model.weight; this->model.beta=model.beta; return 0; diff --git a/model/ranksvm.h b/model/ranksvm.h index e82b6be..aa5e1ca 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -26,7 +26,7 @@ protected: int fsize; public: virtual int train(DataList &D)=0; - virtual int predict(DataList &D,std::list &res)=0; + virtual int predict(DataList &D,std::vector &res)=0; // TODO Not sure how to construct this // Possible solution: generate a nxn matrix each row contains the sorted list of ranker result. int saveModel(const std::string fname); diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp index 776d4db..959ea7d 100644 --- a/model/ranksvmtn.cpp +++ b/model/ranksvmtn.cpp @@ -7,7 +7,8 @@ using namespace std; using namespace Eigen; const int maxiter = 10; -const double prec=1e-3; +const double prec=1e-4; +const double C=1; int cg_solve(const MatrixXd &A, const VectorXd &b, VectorXd &x) { @@ -20,9 +21,7 @@ int cg_solve(const MatrixXd &A, const VectorXd &b, VectorXd &x) { // Non preconditioned version r_1 = res.dot(res); - cout<0?pred(i):0; obj = (pred.cwiseProduct(pred)*C).sum()/2 + w.dot(w)/2; - grad = w - (((pred*C).transpose()*A)*w).transpose(); - for (int i=0;i0) - sv(i,i)=1; - else - sv(i,i)=0; + grad = w - (((pred*C).transpose()*A)*D).transpose(); return 0; } @@ -63,36 +55,40 @@ int line_search(const VectorXd &w,const MatrixXd &D,const MatrixXd &A,const Vect double g,h; t = 0; VectorXd Xd=A*(D*step); + VectorXd pred2; while (1) { - pred = pred - t*Xd; + pred2 = pred - t*Xd; g=wd+t*dd; h=dd; - for (int i=0;i0) { - g += pred(i)*Xd(i); - h += Xd(i)*Xd(i); + for (int i=0;i0) { + g -= C*pred2(i)*Xd(i); + h += C*Xd(i)*Xd(i); } + g=g+1e-12; + h=h+1e-12; + t=t-g/h; + cout<0) - H = H + 2*C*A.row(i).transpose()*A.row(i); + if (pred(i)>0) { + VectorXd v = A.row(i)*D; + H = H + C * (v * v.transpose()); + } // Solve + //cout<0) + ++sv; // When dec is small enough - if (-step.dot(grad) < prec * obj) + LOG(INFO)<<"Iter: "<::iterator iter,st,nx; - for (iter= D.getData().begin();ifeature(j); - nx=st=iter= D.getData().begin(); - ++nx; + int i,j; + LOG(INFO)<<"Processing input"; + for (i=0;ifeature(j); + } int cnt=0; - while (iter!=D.getData().end()) + i=j=0; + while (iqid!=(*nx)->qid) + if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid) { - list::iterator high,low=iter; - for (high=st;((*high)->rank)>0;++high) - for (low=iter;((*low)->rank)<0;--low) - ++cnt; - st = nx; + int high=j; + while (D.getData()[high]->rank>0) + ++high; + cnt += (high-j)*(i-high+1); + j = i+1; } - ++iter; + ++i; } A.resize(cnt,D.getSize()); - nx=st=iter= D.getData().begin(); - ++nx; cnt=i=j=0; - while (iter!=D.getData().end()) + while (iqid!=(*nx)->qid) + if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid) { int v1=j,v2; - list::iterator high,low=iter; - for (high=st;((*high)->rank)>0;++high,++v1) - for (low=iter,v2=i;((*low)->rank)<0;--low,--v2) { + for (v1=j;(D.getData()[v1]->rank)>0;++v1) + for (v2=i;(D.getData()[v2]->rank)<0;--v2) { A(cnt,v1) = 1; A(cnt,v2) = -1; ++cnt; } - st = nx; - j=i+1; + j = i+1; } ++i; - ++iter; } train_orig(fsize,Data,A,model.weight); return 0; }; -int RSVMTN::predict(DataList &D, list &res){ +int RSVMTN::predict(DataList &D, vector &res){ //TODO define A - for (list::iterator i=D.getData().begin(), end=D.getData().end();i!=end;++i) - res.push_back(((*i)->feature).dot(model.weight)); + res.clear(); + for (int i=0;ifeature).dot(model.weight)); return 0; }; \ No newline at end of file diff --git a/model/ranksvmtn.h b/model/ranksvmtn.h index 6ed6ad7..fd99d19 100644 --- a/model/ranksvmtn.h +++ b/model/ranksvmtn.h @@ -13,7 +13,7 @@ public: return "TN"; }; virtual int train(DataList &D); - virtual int predict(DataList &D,std::list &res); + virtual int predict(DataList &D,std::vector &res); }; int cg_solve(const Eigen::MatrixXd &A, const Eigen::VectorXd &b, Eigen::VectorXd &x); diff --git a/tools/dataProvider.h b/tools/dataProvider.h index fbf554b..da3e1ee 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -4,7 +4,6 @@ #include #include "../tools/easylogging++.h" #include -#include // TODO decide how to construct training data // One possible way for training data: @@ -25,18 +24,18 @@ typedef struct DataEntry{ class DataList{ private: int n; - std::list data; + std::vector data; public: int getSize(){return data.size();} - void addEntry(DataEntry* d){data.push_front(d);} + void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} int clear(){ - for (std::list::iterator i=data.begin(),end=data.end();i!=end;++i) - delete *i; + for (int i=0;i getData(){ + std::vector& getData(){ return data; } ~DataList(){ diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h index 6ccf28f..942ec9b 100644 --- a/tools/fileDataProvider.h +++ b/tools/fileDataProvider.h @@ -2,6 +2,7 @@ #define FDPROV_H #include "dataProvider.h" +#include "easylogging++.h" #include #include #include @@ -18,11 +19,17 @@ public: DataEntry* e; out.clear(); int fsize; - out.setfSize(fsize); fin>>fsize; + LOG(INFO)<<"Feature size:"<>e->rank; + if (e->rank == 0) + { + delete e; + break; + } fin>>e->qid; e->feature.resize(fsize); for (int i=0;i