diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | model/rankaccu.cpp | 81 | ||||
-rw-r--r-- | model/rankaccu.h | 4 | ||||
-rw-r--r-- | model/ranksvm.h | 2 | ||||
-rw-r--r-- | model/ranksvmtn.cpp | 26 | ||||
-rw-r--r-- | model/ranksvmtn.h | 2 | ||||
-rw-r--r-- | tools/dataProvider.h | 32 | ||||
-rw-r--r-- | train.cpp | 29 |
8 files changed, 84 insertions, 94 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 180456c..0e356fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ INCLUDE_DIRECTORIES ( "/usr/include/eigen3" ) # Use Random Library for Random Number Generation -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fopenmp") FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED ) INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR}) diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp index 726f2c8..f998e93 100644 --- a/model/rankaccu.cpp +++ b/model/rankaccu.cpp @@ -70,79 +70,64 @@ void rankmerge(int l,int r,vector<int> &C,vector<int> &rank,const vector<double> } } -void rank_accu(DataList &D,const vector<double> pred) +void rank_accu(RidList &D,const vector<double> pred) { unsigned long n = D.getSize(); vector<int> orig_rank(n),pred_rank(n),C(n); vector<double> orig(n); - vector<DataEntry*> &dat = D.getData(); int i,j; - for (i=0;i<dat.size();++i) + for (i=0;i<D.getSize();++i) { orig_rank[i]=i; pred_rank[i]=i; - orig[i]=dat[i]->rank; + orig[i]=D.getL(i); } - int cnt=0; double accu_nDCG=0; double accu_AP=0; - i=j=0; - while (i<dat.size()) + for (j=0; j<D.getSize();j+=D.getqSize()) { - if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid) + i=j+D.getqSize()-1; + double Y=0,Z=0; + double AP=0; + ranksort(j,i,orig_rank,orig,pred); + ranksort(j,i,pred_rank,pred,orig); + for (int k = j;k<=i;++k) { - double Y=0,Z=0; - double AP=0; - ranksort(j,i,orig_rank,orig,pred); - ranksort(j,i,pred_rank,pred,orig); - for (int k = j;k<=i;++k) - { - Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); - Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); - } - accu_nDCG+=Y/Z; - rankmerge(j,i,C,orig_rank,pred,orig); - for (int k = j+1;k<=i;++k) - AP += ((double)C[k])/(k-j); - AP=AP*2/(i-j)-1; - accu_AP+=AP; - j = i+1; - ++cnt; + Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); + Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); } - ++i; + accu_nDCG+=Y/Z; + rankmerge(j,i,C,orig_rank,pred,orig); + for (int k = j+1;k<=i;++k) + AP += ((double)C[k])/(k-j); + AP=AP*2/(i-j)-1; + accu_AP+=AP; } - LOG(INFO)<<"over "<< cnt<< " queries. "<<"Average nDGC: "<< accu_nDCG/cnt<< " Average AP: "<<accu_AP/cnt; + LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize(); } -void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc) { +void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) { unsigned long n = D.getSize(); vector<int> orig_rank(n),pred_rank(n); vector<double> orig(n); - vector<DataEntry*> &dat = D.getData(); int i,j; - for (i=0;i<dat.size();++i) + for (i=0;i<D.getSize();++i) { orig_rank[i]=i; pred_rank[i]=i; - orig[i]=dat[i]->rank; + orig[i]=D.getL(i); } - int cnt=0; - i=j=0; - while (i<dat.size()) + for (j=0; j<D.getSize();j+=D.getqSize()) { - if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid) - { - ranksort(j,i,pred_rank,pred,orig); - for (int k=j;k<=i;++k) - if (orig[pred_rank[k]]>0) - { - LOG(INFO)<<"qid:"<<dat[pred_rank[k]]->qid<<"; pred:"<<pred[k]<<"; rank:"<< k-j; - cmc.addEntry(k-j); - break; // account only for the first match; - } - j = i+1; - ++cnt; - } - ++i; + i=j+D.getqSize()-1; + ranksort(j,i,pred_rank,pred,orig); + for (int k=j;k<=i;++k) + if (orig[pred_rank[k]]>0) + { + LOG(INFO)<<"qid:"<<D.getQid(j)<<"; pred:"<<pred[k]<<"; rank:"<< k-j; + cmc.addEntry(k-j); + break; // account only for the first match; + } + j = i+1; } }
\ No newline at end of file diff --git a/model/rankaccu.h b/model/rankaccu.h index bcb8906..832a9f5 100644 --- a/model/rankaccu.h +++ b/model/rankaccu.h @@ -36,8 +36,8 @@ public: } }; -void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc); +void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc); -void rank_accu(DataList &D,const std::vector<double> pred); +void rank_accu(RidList &D,const std::vector<double> pred); #endif //RANKSVM_RANKACCU_H diff --git a/model/ranksvm.h b/model/ranksvm.h index 9051343..20bb35a 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -26,7 +26,7 @@ protected: int fsize; public: virtual int train(RidList &D)=0; - virtual int predict(DataList &D,std::vector<double> &res)=0; + virtual int predict(RidList &D,std::vector<double> &res)=0; // TODO Not sure how to construct this // Possible solution: generate a nxn matrix each row contains the sorted list of ranker result. int saveModel(const std::string fname); diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp index 1414c81..f904fdd 100644 --- a/model/ranksvmtn.cpp +++ b/model/ranksvmtn.cpp @@ -7,7 +7,7 @@ using namespace std; using namespace Eigen; // Main terminating criteria -const int maxiter = 40; // max iteration count +const int maxiter = 50; // max iteration count const double prec=1e-10; // precision // conjugate gradient const double cg_prec=1e-10; // precision @@ -17,14 +17,21 @@ const int ls_maxiter = 10; const double line_prec=1e-10; // precision const double line_turb=1e-15; // purturbation +void cal_Dw(RidList &D,const VectorXd &w, VectorXd &Dw) +{ + int n = D.getSize(); + #pragma omp parallel for + for (int i=0;i<n;++i) + Dw(i) = D.getVec(i).dot(w); +} + int cal_Hs(RidList &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alpha,const VectorXd s,VectorXd &Hs) { int n = D.getSize(); int q = D.getqSize(); Hs = VectorXd::Zero(s.rows()); VectorXd Ds(n); - for (int i=0;i<n;++i) - Ds(i) = D.getVec(i).dot(s); + cal_Dw(D,s,Ds); VectorXd gamma(n); for (int i=0;i<n;) { @@ -153,8 +160,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd { int n=D.getSize(); VectorXd Dd(n); - for (int i=0;i<n;++i) - Dd(i) = D.getVec(i).dot(step); + cal_Dw(D,step,Dd); VectorXd alpha,beta,yt; VectorXd grad; VectorXd Hs; @@ -167,8 +173,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd while (1) { grad=w+t*step; - for (int i=0;i<n;++i) - Dd(i) = D.getVec(i).dot(grad); + cal_Dw(D,grad,Dd); cal_alpha_beta(Dd,corr,D,rank,yt,alpha,beta); VectorXd tmp = alpha.cwiseProduct(yt)-beta; VectorXd res = 0*grad; @@ -209,8 +214,7 @@ int train_orig(int fsize, RidList &Data,const VectorXd &corr,VectorXd &weight){ VectorXd alpha,beta; while (true) { - for (int i=0;i<n;++i) - dw(i) = Data.getVec(i).dot(weight); + cal_Dw(Data,weight,dw); cal_alpha_beta(dw,corr,Data,rank,yt,alpha,beta); // Generate support vector matrix sv & gradient obj = (weight.dot(weight) + C*(alpha.dot(yt.cwiseProduct(yt))-beta.dot(yt)))/2; @@ -251,9 +255,9 @@ int RSVMTN::train(RidList &D){ return 0; }; -int RSVMTN::predict(DataList &D, vector<double> &res){ +int RSVMTN::predict(RidList &D, vector<double> &res){ res.clear(); for (int i=0;i<D.getSize();++i) - res.push_back(((D.getData()[i])->feature).dot(model.weight)); + res.push_back(D.getVec(i).dot(model.weight)); return 0; };
\ No newline at end of file diff --git a/model/ranksvmtn.h b/model/ranksvmtn.h index c98e581..97579b3 100644 --- a/model/ranksvmtn.h +++ b/model/ranksvmtn.h @@ -13,7 +13,7 @@ public: return "TN"; }; virtual int train(RidList &D); - virtual int predict(DataList &D,std::vector<double> &res); + virtual int predict(RidList &D,std::vector<double> &res); }; #endif
\ No newline at end of file diff --git a/tools/dataProvider.h b/tools/dataProvider.h index 586965e..59b989a 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -82,9 +82,23 @@ public: else uniq.push_back(d); } + inline DataEntry* getU(int x) + { + return uniq[x]; + } + inline DataEntry* getO(int x) + { + return other[x]; + } + inline std::string getQid(int x) + { + int a,b,n=getqSize(); + a=x/n; + return getU(a)->qid; + } inline int getqSize() { - return (int)(uniq.size()+other.size()-1); + return (int)other.size(); } inline int getuSize() { @@ -99,24 +113,14 @@ public: a=x/n; b=x%n; Eigen::VectorXd vec; - if (b<a) - vec=uniq[a]->feature-uniq[b]->feature; - else - if (b<uniq.size()-1) - vec=uniq[a]->feature-uniq[b+1]->feature; - else - vec=uniq[a]->feature-other[b-uniq.size()+1]->feature; - return vec.cwiseAbs(); + return (uniq[a]->feature-other[b]->feature).cwiseAbs(); }; inline double getL(int x){ int a,b,n=getqSize(); a=x/n; b=x%n; - if (b<uniq.size()-1) - return -1; - else - if (std::fabs(other[b-uniq.size()+1]->rank - a) < 1e-5) - return 1; + if (std::fabs(other[b]->rank - a) < 1e-5) + return 1; return -1; }; }; @@ -43,7 +43,7 @@ int predict(DataProvider &dp) { rsvm = RSVM::loadModel(vm["model"].as<string>().c_str()); dp.open(); - DataList D; + RidList D; vector<double> L; CMC cmc; LOG(INFO)<<"Prediction started"; @@ -59,24 +59,21 @@ int predict(DataProvider &dp) { else ot=&cout; - while (!dp.EOFile()) - { - dp.getDataSet(D); - LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features"; - rsvm->predict(D,L); - - if (vm.count("validate")) - { - rank_accu(D,L); - if (vm.count("cmc")) - rank_CMC(D,L,cmc); - } + dp.getAllDataSet(D); + LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features"; + rsvm->predict(D,L); - if (vm.count("output") && vm.count("predict")) - for (int i=0; i<L.size();++i) - *ot<<L[i]<<endl; + if (vm.count("validate")) + { + rank_accu(D,L); + if (vm.count("cmc")) + rank_CMC(D,L,cmc); } + if (vm.count("output") && vm.count("predict")) + for (int i=0; i<L.size();++i) + *ot<<L[i]<<endl; + LOG(INFO)<<"Finished"; if (vm.count("cmc")) { |