diff options
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | README | 24 | ||||
| -rw-r--r-- | model/rankaccu.cpp | 3 | ||||
| -rw-r--r-- | model/ranksvm.h | 2 | ||||
| -rw-r--r-- | split.cpp | 10 | ||||
| -rw-r--r-- | tools/dataProvider.h | 23 | ||||
| -rw-r--r-- | tools/fileDataProvider.cpp | 1 | ||||
| -rw-r--r-- | tools/fileDataProvider.h | 6 | ||||
| -rw-r--r-- | tools/matrixIO.h | 2 | ||||
| -rw-r--r-- | train.cpp | 23 | 
10 files changed, 76 insertions, 19 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 97d548e..82aa7f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,5 @@ INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})  set(SOURCE_FILES model/ranksvm.cpp model/ranksvmtn.cpp model/rankaccu.cpp tools/fileDataProvider.cpp)  add_executable(ranksvm train.cpp ${SOURCE_FILES} model/rankaccu.h model/ranksvm.h model/ranksvmtn.h tools/dataProvider.h tools/matrixIO.h tools/fileDataProvider.h tools/dataProvider.cpp model/rankmisc.h)  add_executable(split split.cpp ${SOURCE_FILES} tools/dataProvider.cpp) -add_dependencies(ranksvm split)  TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} )  TARGET_LINK_LIBRARIES( split ${Boost_LIBRARIES})
\ No newline at end of file @@ -0,0 +1,24 @@ +RankSVM Training, validating, & predicting +========================================== + +Acknowledgement: +Logging is provided by easylogging++ +up-to-date site: https://github.com/easylogging/easyloggingpp + +Requirements: +CMAKE, +GCC(C++) or any c++11 compatible compiler, +boost libraries(program_options), +Eigen3. + +How to build(Release): +1.copy the source files to an empty directory +2.mkdir Release +3.cd Release +# In the Release directory +4.cmake -DCMAKE_BUILD_TYPE=Release .. +# Change the Release to Debug for Debug build + +How to build it under windows(un-tested) +1.Install & configure: Eigen3 & boost +2.use cmake or VS
\ No newline at end of file diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp index 0f55e26..caa3c5a 100644 --- a/model/rankaccu.cpp +++ b/model/rankaccu.cpp @@ -102,6 +102,7 @@ void rank_accu(RidList &D,const vector<double> pred)              AP += ((double)C[k])/(k-j);          AP=AP*2/(i-j)-1;          accu_AP+=AP; +        LOG(INFO)<<"qid:"<<D.getQid(j)<<"; nDCG:"<<Y/Z<<"; AP:"<< AP;      }      LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize();  } @@ -127,6 +128,8 @@ void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) {                  cmc.addEntry(k-j);                  break; // account only for the first match;              } +        LOG(INFO)<<"top: "<< D.getO(pred_rank[j]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+1]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+2]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+3]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+4]%D.getqSize())->qid +                 <<" "<< D.getO(pred_rank[j+5]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+6]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+7]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+8]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+9]%D.getqSize())->qid;      }  } diff --git a/model/ranksvm.h b/model/ranksvm.h index a17e3c9..a19ad63 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -27,8 +27,6 @@ protected:  public:      virtual int train(RidList &D)=0;      virtual int predict(RidList &D,std::vector<double> &res)=0; -    // TODO Not sure how to construct this -    //  Possible solution: generate a nxn matrix each row contains the sorted list of ranker result.      int saveModel(const std::string fname);      static RSVM* loadModel(const std::string fname);      virtual std::string getName()=0; @@ -1,6 +1,10 @@ -// -// Created by joe on 5/13/15. -// +/* + * split: helper program to split rid file + *  usage: ./split -h to see all options + *  support: + *      shuffling + *      splitting + */  #include <iostream>  #include <boost/program_options.hpp> diff --git a/tools/dataProvider.h b/tools/dataProvider.h index eed3079..891be86 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -6,15 +6,12 @@  #include<vector>  #include<math.h> -// TODO decide how to construct training data -// One possible way for training data: -//  Matrix composed of an array of feature vectors -//  Labels are composed of linked list, such as -//      6,3,4,0,5,0,0 -//      =>  0->6 | 1->3 | 2->4->5 -//  How to compensate for non exhaustive labeling? -//      Use -1 to indicate not yet labeled data -//      -1s will be excluded from training +// Training data(Rid): +//  First line: the total number of features(fsize) +//  from the second line to last-1 +//      nametag(string) fsize number of features for nametag +//          possible: "-1" nametag mean don't care(partially implemented, not sure) +//  last line(delimiter to terminate data read): 0  typedef struct DataEntry{      std::string qid; @@ -70,7 +67,6 @@ public:          all.clear();      }      void setfSize(int fsize){n=fsize;} -    inline int getfSize(){return n;}      void addEntry(DataEntry* d){          int ext=false;          all.push_back(d); @@ -92,6 +88,8 @@ public:              d->rank=uniq.size()-1;          }      } +    // A lot of getters +    inline int getfSize(){return n;}      inline DataEntry* getU(int x)      {          return uniq[x]; @@ -182,9 +180,11 @@ public:          }          return res;      } +    // master cal -> possible multiplexing      inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) {          return fabs((*id)[i] - (*oth)[i]);      } +    // TODO getvec as VectorXd -> deprecating due to performance issues      inline Eigen::VectorXd getVec(int x){          int a,b,q=getqSize();          a=x/q; @@ -207,6 +207,7 @@ public:              res(i)=cal(id,oth,i);          return res;      }; +    // w*Vec -> linear factor      inline double getVecDot(int x,const Eigen::VectorXd &w)      {          int a,b,q=getqSize(); @@ -230,6 +231,7 @@ public:              res += cal(id,oth,i)*w[i];          return res;      } +    // w*Vec -> linear factor      inline void addVecw(int x,double w,Eigen::VectorXd &X)      {          int a,b,q=getqSize(); @@ -251,6 +253,7 @@ public:          for (int i=0;i<n;++i)              X[i] += cal(id,oth,i)*w;      } +    // get label of vector x      inline double getL(int x){          int a,b,q=getqSize();          a=x/q; diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp index 2b52dc7..a0cbf9a 100644 --- a/tools/fileDataProvider.cpp +++ b/tools/fileDataProvider.cpp @@ -8,6 +8,7 @@  using namespace std; +// Random generator  mt19937 gen;  int FileDP::getDataSet(DataList &out){ diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h index 0ab1948..1c40d6e 100644 --- a/tools/fileDataProvider.h +++ b/tools/fileDataProvider.h @@ -1,3 +1,6 @@ +// File Data Provider +// Any kind of dataprovider that reads from file +  #ifndef FDPROV_H  #define FDPROV_H @@ -8,7 +11,7 @@  #include <fstream>  // Rank qid features - +// Deprecated due to algorithm update  class FileDP:public DataProvider  {  private: @@ -23,7 +26,6 @@ public:  };  // label features -  class RidFileDP:public DataProvider  {  private: diff --git a/tools/matrixIO.h b/tools/matrixIO.h index 88cd419..ea5f85f 100644 --- a/tools/matrixIO.h +++ b/tools/matrixIO.h @@ -1,3 +1,5 @@ +// Some helper to read&write matrices data +  #ifndef MATIO_H  #define MATIO_H @@ -1,3 +1,24 @@ +/* + * ranksvm: main program + *  usage: ./ranksvm -h to see all options + *  support: + *      training + *      validating + *      predicting + *  model: + *      TN  RankSVM(truncated newton, conjugate gradient, various opt) + *      BH  bhat-dist + *      HE  Hell-dist(but output chance instead?!) + *  out features: + *      cmc + *          Cumulative Matching Characteristic + *      avg + *          Normalized avg rank + *      predict + *          image pair relevance value + */ + +  #include <iostream>  #include <Eigen/Dense>  #include <boost/program_options.hpp> @@ -190,7 +211,7 @@ int main(int argc, char **argv) {      else return 0;      DataProvider* dp;      if (vm["feature"].as<string>().find(".rid") == string::npos) -        LOG(FATAL)<<"Format not supported"; +        LOG(FATAL)<<"Format no longer supported";      else      {          RidFileDP* tmpdp = new RidFileDP(vm["feature"].as<string>());  | 
