diff options
| -rw-r--r-- | CMakeLists.txt | 2 | ||||
| -rw-r--r-- | model/rankaccu.cpp | 81 | ||||
| -rw-r--r-- | model/rankaccu.h | 4 | ||||
| -rw-r--r-- | model/ranksvm.h | 2 | ||||
| -rw-r--r-- | model/ranksvmtn.cpp | 26 | ||||
| -rw-r--r-- | model/ranksvmtn.h | 2 | ||||
| -rw-r--r-- | tools/dataProvider.h | 32 | ||||
| -rw-r--r-- | train.cpp | 29 | 
8 files changed, 84 insertions, 94 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 180456c..0e356fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ INCLUDE_DIRECTORIES ( "/usr/include/eigen3" )  # Use Random Library for Random Number Generation -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fopenmp")  FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED )  INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR}) diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp index 726f2c8..f998e93 100644 --- a/model/rankaccu.cpp +++ b/model/rankaccu.cpp @@ -70,79 +70,64 @@ void rankmerge(int l,int r,vector<int> &C,vector<int> &rank,const vector<double>      }  } -void rank_accu(DataList &D,const vector<double> pred) +void rank_accu(RidList &D,const vector<double> pred)  {      unsigned long n = D.getSize();      vector<int> orig_rank(n),pred_rank(n),C(n);      vector<double> orig(n); -    vector<DataEntry*> &dat = D.getData();      int i,j; -    for (i=0;i<dat.size();++i) +    for (i=0;i<D.getSize();++i)      {          orig_rank[i]=i;          pred_rank[i]=i; -        orig[i]=dat[i]->rank; +        orig[i]=D.getL(i);      } -    int cnt=0;      double accu_nDCG=0;      double accu_AP=0; -    i=j=0; -    while (i<dat.size()) +    for (j=0; j<D.getSize();j+=D.getqSize())      { -        if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid) +        i=j+D.getqSize()-1; +        double Y=0,Z=0; +        double AP=0; +        ranksort(j,i,orig_rank,orig,pred); +        ranksort(j,i,pred_rank,pred,orig); +        for (int k = j;k<=i;++k)          { -            double Y=0,Z=0; -            double AP=0; -            ranksort(j,i,orig_rank,orig,pred); -            ranksort(j,i,pred_rank,pred,orig); -            for (int k = j;k<=i;++k) -            { -                Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); -                Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); -            } -            accu_nDCG+=Y/Z; -            rankmerge(j,i,C,orig_rank,pred,orig); -            for (int k = j+1;k<=i;++k) -                AP += ((double)C[k])/(k-j); -            AP=AP*2/(i-j)-1; -            accu_AP+=AP; -            j = i+1; -            ++cnt; +            Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); +            Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j);          } -        ++i; +        accu_nDCG+=Y/Z; +        rankmerge(j,i,C,orig_rank,pred,orig); +        for (int k = j+1;k<=i;++k) +            AP += ((double)C[k])/(k-j); +        AP=AP*2/(i-j)-1; +        accu_AP+=AP;      } -    LOG(INFO)<<"over "<< cnt<< " queries. "<<"Average nDGC: "<< accu_nDCG/cnt<< " Average AP: "<<accu_AP/cnt; +    LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize();  } -void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc) { +void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) {      unsigned long n = D.getSize();      vector<int> orig_rank(n),pred_rank(n);      vector<double> orig(n); -    vector<DataEntry*> &dat = D.getData();      int i,j; -    for (i=0;i<dat.size();++i) +    for (i=0;i<D.getSize();++i)      {          orig_rank[i]=i;          pred_rank[i]=i; -        orig[i]=dat[i]->rank; +        orig[i]=D.getL(i);      } -    int cnt=0; -    i=j=0; -    while (i<dat.size()) +    for (j=0; j<D.getSize();j+=D.getqSize())      { -        if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid) -        { -            ranksort(j,i,pred_rank,pred,orig); -            for (int k=j;k<=i;++k) -                if (orig[pred_rank[k]]>0) -                { -                    LOG(INFO)<<"qid:"<<dat[pred_rank[k]]->qid<<"; pred:"<<pred[k]<<"; rank:"<< k-j; -                    cmc.addEntry(k-j); -                    break; // account only for the first match; -                } -            j = i+1; -            ++cnt; -        } -        ++i; +        i=j+D.getqSize()-1; +        ranksort(j,i,pred_rank,pred,orig); +        for (int k=j;k<=i;++k) +            if (orig[pred_rank[k]]>0) +            { +                LOG(INFO)<<"qid:"<<D.getQid(j)<<"; pred:"<<pred[k]<<"; rank:"<< k-j; +                cmc.addEntry(k-j); +                break; // account only for the first match; +            } +        j = i+1;      }  }
\ No newline at end of file diff --git a/model/rankaccu.h b/model/rankaccu.h index bcb8906..832a9f5 100644 --- a/model/rankaccu.h +++ b/model/rankaccu.h @@ -36,8 +36,8 @@ public:      }  }; -void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc); +void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc); -void rank_accu(DataList &D,const std::vector<double> pred); +void rank_accu(RidList &D,const std::vector<double> pred);  #endif //RANKSVM_RANKACCU_H diff --git a/model/ranksvm.h b/model/ranksvm.h index 9051343..20bb35a 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -26,7 +26,7 @@ protected:      int fsize;  public:      virtual int train(RidList &D)=0; -    virtual int predict(DataList &D,std::vector<double> &res)=0; +    virtual int predict(RidList &D,std::vector<double> &res)=0;      // TODO Not sure how to construct this      //  Possible solution: generate a nxn matrix each row contains the sorted list of ranker result.      int saveModel(const std::string fname); diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp index 1414c81..f904fdd 100644 --- a/model/ranksvmtn.cpp +++ b/model/ranksvmtn.cpp @@ -7,7 +7,7 @@ using namespace std;  using namespace Eigen;  // Main terminating criteria -const int maxiter = 40; // max iteration count +const int maxiter = 50; // max iteration count  const double prec=1e-10; // precision  // conjugate gradient  const double cg_prec=1e-10; // precision @@ -17,14 +17,21 @@ const int ls_maxiter = 10;  const double line_prec=1e-10; // precision  const double line_turb=1e-15; // purturbation +void cal_Dw(RidList &D,const VectorXd &w, VectorXd &Dw) +{ +    int n = D.getSize(); +    #pragma omp parallel for +    for (int i=0;i<n;++i) +        Dw(i) = D.getVec(i).dot(w); +} +  int cal_Hs(RidList &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alpha,const VectorXd s,VectorXd &Hs)  {      int n = D.getSize();      int q = D.getqSize();      Hs = VectorXd::Zero(s.rows());      VectorXd Ds(n); -    for (int i=0;i<n;++i) -        Ds(i) = D.getVec(i).dot(s); +    cal_Dw(D,s,Ds);      VectorXd gamma(n);      for (int i=0;i<n;)      { @@ -153,8 +160,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd  {      int n=D.getSize();      VectorXd Dd(n); -    for (int i=0;i<n;++i) -        Dd(i) = D.getVec(i).dot(step); +    cal_Dw(D,step,Dd);      VectorXd alpha,beta,yt;      VectorXd grad;      VectorXd Hs; @@ -167,8 +173,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd      while (1)      {          grad=w+t*step; -        for (int i=0;i<n;++i) -            Dd(i) = D.getVec(i).dot(grad); +        cal_Dw(D,grad,Dd);          cal_alpha_beta(Dd,corr,D,rank,yt,alpha,beta);          VectorXd tmp = alpha.cwiseProduct(yt)-beta;          VectorXd res = 0*grad; @@ -209,8 +214,7 @@ int train_orig(int fsize, RidList &Data,const VectorXd &corr,VectorXd &weight){      VectorXd alpha,beta;      while (true)      { -        for (int i=0;i<n;++i) -            dw(i) = Data.getVec(i).dot(weight); +        cal_Dw(Data,weight,dw);          cal_alpha_beta(dw,corr,Data,rank,yt,alpha,beta);          // Generate support vector matrix sv & gradient          obj = (weight.dot(weight) + C*(alpha.dot(yt.cwiseProduct(yt))-beta.dot(yt)))/2; @@ -251,9 +255,9 @@ int RSVMTN::train(RidList &D){      return 0;  }; -int RSVMTN::predict(DataList &D, vector<double> &res){ +int RSVMTN::predict(RidList &D, vector<double> &res){      res.clear();      for (int i=0;i<D.getSize();++i) -        res.push_back(((D.getData()[i])->feature).dot(model.weight)); +        res.push_back(D.getVec(i).dot(model.weight));      return 0;  };
\ No newline at end of file diff --git a/model/ranksvmtn.h b/model/ranksvmtn.h index c98e581..97579b3 100644 --- a/model/ranksvmtn.h +++ b/model/ranksvmtn.h @@ -13,7 +13,7 @@ public:          return "TN";      };      virtual int train(RidList &D); -    virtual int predict(DataList &D,std::vector<double> &res); +    virtual int predict(RidList &D,std::vector<double> &res);  };  #endif
\ No newline at end of file diff --git a/tools/dataProvider.h b/tools/dataProvider.h index 586965e..59b989a 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -82,9 +82,23 @@ public:          else              uniq.push_back(d);      } +    inline DataEntry* getU(int x) +    { +        return uniq[x]; +    } +    inline DataEntry* getO(int x) +    { +        return other[x]; +    } +    inline std::string getQid(int x) +    { +        int a,b,n=getqSize(); +        a=x/n; +        return getU(a)->qid; +    }      inline int getqSize()      { -        return (int)(uniq.size()+other.size()-1); +        return (int)other.size();      }      inline int getuSize()      { @@ -99,24 +113,14 @@ public:          a=x/n;          b=x%n;          Eigen::VectorXd vec; -        if (b<a) -            vec=uniq[a]->feature-uniq[b]->feature; -        else -        if (b<uniq.size()-1) -            vec=uniq[a]->feature-uniq[b+1]->feature; -        else -            vec=uniq[a]->feature-other[b-uniq.size()+1]->feature; -        return vec.cwiseAbs(); +        return (uniq[a]->feature-other[b]->feature).cwiseAbs();      };      inline double getL(int x){          int a,b,n=getqSize();          a=x/n;          b=x%n; -        if (b<uniq.size()-1) -            return -1; -        else -            if (std::fabs(other[b-uniq.size()+1]->rank - a) < 1e-5) -                return 1; +        if (std::fabs(other[b]->rank - a) < 1e-5) +            return 1;          return -1;      };  }; @@ -43,7 +43,7 @@ int predict(DataProvider &dp) {      rsvm = RSVM::loadModel(vm["model"].as<string>().c_str());      dp.open(); -    DataList D; +    RidList D;      vector<double> L;      CMC cmc;      LOG(INFO)<<"Prediction started"; @@ -59,24 +59,21 @@ int predict(DataProvider &dp) {      else          ot=&cout; -    while (!dp.EOFile()) -    { -        dp.getDataSet(D); -        LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features"; -        rsvm->predict(D,L); - -        if (vm.count("validate")) -        { -            rank_accu(D,L); -            if (vm.count("cmc")) -                rank_CMC(D,L,cmc); -        } +    dp.getAllDataSet(D); +    LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features"; +    rsvm->predict(D,L); -        if (vm.count("output") && vm.count("predict")) -            for (int i=0; i<L.size();++i) -                *ot<<L[i]<<endl; +    if (vm.count("validate")) +    { +        rank_accu(D,L); +        if (vm.count("cmc")) +            rank_CMC(D,L,cmc);      } +    if (vm.count("output") && vm.count("predict")) +        for (int i=0; i<L.size();++i) +            *ot<<L[i]<<endl; +      LOG(INFO)<<"Finished";      if (vm.count("cmc"))      {  | 
