From 52ca742e14e36cd37010bae9a7678dd5b1a7eaf5 Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Wed, 13 May 2015 18:56:39 +0800 Subject: branched for mem --- model/ranksvm.h | 2 +- model/ranksvmtn.cpp | 126 ++++++++++++++++++++++----------------------- model/ranksvmtn.h | 2 +- tools/dataProvider.h | 81 +++++++++++++++++++++++------ tools/fileDataProvider.cpp | 11 ++++ tools/fileDataProvider.h | 2 + train.cpp | 4 +- 7 files changed, 144 insertions(+), 84 deletions(-) diff --git a/model/ranksvm.h b/model/ranksvm.h index aa5e1ca..58cd7f0 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -25,7 +25,7 @@ protected: SVMModel model; int fsize; public: - virtual int train(DataList &D)=0; + virtual int train(RidList &D)=0; virtual int predict(DataList &D,std::vector &res)=0; // TODO Not sure how to construct this // Possible solution: generate a nxn matrix each row contains the sorted list of ranker result. diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp index 01d9851..d3ef3af 100644 --- a/model/ranksvmtn.cpp +++ b/model/ranksvmtn.cpp @@ -17,37 +17,44 @@ const int cg_maxiter = 30; const double line_prec=1e-10; // precision const double line_turb=1e-15; // purturbation -int cal_Hs(const MatrixXd &D,const vector &rank,const VectorXd &corr,const VectorXd &alpha,const vector &A1,const vector &A2,const VectorXd s,VectorXd &Hs) +int cal_Hs(RidList &D,const vector &rank,const VectorXd &corr,const VectorXd &alpha,const VectorXd s,VectorXd &Hs) { Hs = VectorXd::Zero(s.rows()); - VectorXd Ds=D*s; - VectorXd gamma(D.rows()); - for (int i=0;i=0;--j) + if (corr[rank[i+j]]>0) + gamma[rank[i+j]]=g; else - g+=Ds[rank[j]]; + g+=Ds[rank[i+j]]; g=0; - for (int j = A2[i];j>=A1[i];--j) - if (corr[rank[j]]>0) - gamma[rank[j]]=g; + i+=D.getqSize(); + for (int j = D.getqSize();j>0;--j) + if (corr[rank[i-j]]<0) + gamma[rank[i-j]]=g; else - g+=Ds[rank[j]]; + g+=Ds[rank[i-j]]; } - Hs = s + C*(D.transpose()*(alpha.cwiseProduct(Ds) - gamma)); + VectorXd tmp = alpha.cwiseProduct(Ds)-gamma; + VectorXd res = 0*s; + for (int i=0;i &rank,const VectorXd &corr,const VectorXd &alph,const vector &A1,const vector &A2,const VectorXd &b, VectorXd &x) +int cg_solve(RidList &D,const vector &rank,const VectorXd &corr,const VectorXd &alph,const VectorXd &b, VectorXd &x) { double alpha,beta,r_1,r_2; int iter=0; VectorXd q; VectorXd Hs; - cal_Hs(D,rank,corr,alph,A1,A2,x,Hs); + cal_Hs(D,rank,corr,alph,x,Hs); VectorXd res = b - Hs; VectorXd p = res; while (1) @@ -65,7 +72,7 @@ int cg_solve(const MatrixXd &D,const vector &rank,const VectorXd &corr,cons beta = r_1 / r_2; p = res + p * beta; } - cal_Hs(D,rank,corr,alph,A1,A2,p,q); + cal_Hs(D,rank,corr,alph,p,q); alpha = r_1/p.dot(q); x=x+p*alpha; res=res-q*alpha; @@ -98,18 +105,19 @@ void ranksort(int l,int r,vector &rank,VectorXd &ref) ranksort(i,r,rank,ref); } -int cal_alpha_beta(const VectorXd &dw,const VectorXd &corr,const vector &A1,const vector &A2,vector &rank,VectorXd &yt,VectorXd &alpha,VectorXd &beta) +int cal_alpha_beta(const VectorXd &dw,const VectorXd &corr,RidList &D,vector &rank,VectorXd &yt,VectorXd &alpha,VectorXd &beta) { long n = dw.rows(); yt = dw - corr; alpha=VectorXd::Zero(n); beta=VectorXd::Zero(n); for (int i=0;i &A1 b+=yt[rank[j]]; } a=b=0; - for (int j=A2[i];j>=A1[i];--j) + for (int j=ed;j>=i;--j) if (corr[rank[j]]>0) { alpha[rank[j]]=a; @@ -136,28 +144,33 @@ int cal_alpha_beta(const VectorXd &dw,const VectorXd &corr,const vector &A1 } // line search using newton method -int line_search(const VectorXd &w,const MatrixXd &D,const VectorXd &corr,const vector &A1,const vector &A2,const VectorXd &step,double &t) +int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd &step,double &t) { - VectorXd Dd = D*step; - VectorXd Xd = VectorXd::Zero(A1.size()); + VectorXd Dd(D.getSize()); + for (int i=0;i rank(D.rows()); + int n=D.getSize(); + vector rank(D.getSize()); int iter = 0; - for (int i=0;i &A1,const vector &A2,const VectorXd &corr,VectorXd &weight){ +int train_orig(int fsize, RidList &Data,const VectorXd &corr,VectorXd &weight){ int iter = 0; - long n=D.rows(); - LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Query size:" << A1.size(); + long n=Data.getSize(); + LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Query size:" << Data.getuSize(); VectorXd grad(fsize); VectorXd step(fsize); vector rank(n); double obj,t; - VectorXd dw = D*weight; + VectorXd dw(n); VectorXd yt; VectorXd alpha,beta; while (true) @@ -196,16 +209,21 @@ int train_orig(int fsize, MatrixXd &D,const vector &A1,const vector &A break; } - dw = D*weight; - cal_alpha_beta(dw,corr,A1,A2,rank,yt,alpha,beta); + for (int i=0;i &A1,const vector &A return 0; } -int RSVMTN::train(DataList &D){ - MatrixXd Data(D.getSize(),D.getfSize()); +int RSVMTN::train(RidList &D){ VectorXd corr(D.getSize()); vector A1,A2; int i,j; LOG(INFO)<<"Processing input"; - vector &dat = D.getData(); - for (i=0;irank>0?0.5:-0.5; - - for (j = 0; j < D.getfSize(); ++j){ - Data(i, j) = dat[i]->feature(j);} - - } - i=j=0; - while (iqid!=dat[i+1]->qid) - { - A1.push_back(j); - A2.push_back(i); - j = i+1; - } - ++i; - } - train_orig(fsize,Data,A1,A2,corr,model.weight); + for (i=0;i0?0.5:-0.5; + train_orig(fsize,D,corr,model.weight); return 0; }; diff --git a/model/ranksvmtn.h b/model/ranksvmtn.h index 4074781..c98e581 100644 --- a/model/ranksvmtn.h +++ b/model/ranksvmtn.h @@ -12,7 +12,7 @@ public: { return "TN"; }; - virtual int train(DataList &D); + virtual int train(RidList &D); virtual int predict(DataList &D,std::vector &res); }; diff --git a/tools/dataProvider.h b/tools/dataProvider.h index 028980e..a3f3d34 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -55,6 +55,70 @@ public: } }; +class RidList{ +private: + int n; + std::vector uniq; + std::vector other; +public: + void clear(){ + uniq.clear(); + other.clear(); + } + void setfSize(int fsize){n=fsize;} + int getfSize(){return n;} + void addEntry(DataEntry* d){ + int ext=false; + for (int i=0;iqid==d->qid) + { + ext = true; + d->rank = i; + } + if (ext) + other.push_back(d); + else + uniq.push_back(d); + } + int getqSize() + { + return (int)(uniq.size()+other.size()-1); + } + int getuSize() + { + return (int)uniq.size(); + } + int getSize() + { + return getuSize()*getqSize(); + } + Eigen::VectorXd getVec(int x){ + int a,b,n=getqSize(); + a=x/n; + b=x%n; + Eigen::VectorXd vec = uniq[a]->feature; + if (bfeature; + else + if (bfeature; + else + vec=vec-other[b-uniq.size()+1]->feature; + return vec.cwiseAbs(); + }; + double getL(int x){ + int a,b,n=(int)(uniq.size()+other.size()-1); + a=x/n; + b=x%n; + if (brank - a) < 1e-5) + return 1; + return -1; + }; +}; + class DataProvider //Virtual base class for data input { protected: @@ -63,22 +127,7 @@ public: DataProvider():eof(false){}; bool EOFile(){return eof;} - void getAllDataSet(DataList &out){\ - out.clear(); - DataList buf; - while (!EOFile()) - { - getDataSet(buf); - // won't work as data are discarded with every call to getDataSet - // out.getData().insert(out.getData().end(),buf.getData().begin(),buf.getData().end()); - for (int i=0;i &a,vector &b) b.push_back(tmp[i]); scrambler(a); scrambler(b); +} + +void RidFileDP::getAllDataSet(RidList &out){ + DataEntry *e; + if (!read) + readEntries(); + out.clear(); + std::vector &dat = d.getData(); + for (int i=0;i()); dp.open(); - DataList D; + RidList D; LOG(INFO)<<"Training started"; dp.getAllDataSet(D); LOG(INFO)<<"Read "<train(D); - vector L; - rsvm->predict(D,L); LOG(INFO)<<"Training finished,saving model"; -- cgit v1.2.3-70-g09d2