From 60881b380b02637c27497c4508faf2345a534679 Mon Sep 17 00:00:00 2001
From: Joe Zhao <ztuowen@gmail.com>
Date: Sat, 27 Jun 2015 20:42:23 +0800
Subject: comments & readme

---
 CMakeLists.txt             |  1 -
 README                     | 24 ++++++++++++++++++++++++
 model/rankaccu.cpp         |  3 +++
 model/ranksvm.h            |  2 --
 split.cpp                  | 10 +++++++---
 tools/dataProvider.h       | 23 +++++++++++++----------
 tools/fileDataProvider.cpp |  1 +
 tools/fileDataProvider.h   |  6 ++++--
 tools/matrixIO.h           |  2 ++
 train.cpp                  | 23 ++++++++++++++++++++++-
 10 files changed, 76 insertions(+), 19 deletions(-)
 create mode 100644 README
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 97d548e..82aa7f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,6 +14,5 @@ INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})
 set(SOURCE_FILES model/ranksvm.cpp model/ranksvmtn.cpp model/rankaccu.cpp tools/fileDataProvider.cpp)
 add_executable(ranksvm train.cpp ${SOURCE_FILES} model/rankaccu.h model/ranksvm.h model/ranksvmtn.h tools/dataProvider.h tools/matrixIO.h tools/fileDataProvider.h tools/dataProvider.cpp model/rankmisc.h)
 add_executable(split split.cpp ${SOURCE_FILES} tools/dataProvider.cpp)
-add_dependencies(ranksvm split)
 TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} )
 TARGET_LINK_LIBRARIES( split ${Boost_LIBRARIES})
\ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..54d8f50
--- /dev/null
+++ b/README
@@ -0,0 +1,24 @@
+RankSVM Training, validating, & predicting
+==========================================
+
+Acknowledgement:
+Logging is provided by easylogging++
+up-to-date site: https://github.com/easylogging/easyloggingpp
+
+Requirements:
+CMAKE,
+GCC(C++) or any c++11 compatible compiler,
+boost libraries(program_options),
+Eigen3.
+
+How to build(Release):
+1.copy the source files to an empty directory
+2.mkdir Release
+3.cd Release
+# In the Release directory
+4.cmake -DCMAKE_BUILD_TYPE=Release ..
+# Change the Release to Debug for Debug build
+
+How to build it under windows(un-tested)
+1.Install & configure: Eigen3 & boost
+2.use cmake or VS
\ No newline at end of file
diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp
index 0f55e26..caa3c5a 100644
--- a/model/rankaccu.cpp
+++ b/model/rankaccu.cpp
@@ -102,6 +102,7 @@ void rank_accu(RidList &D,const vector<double> pred)
             AP += ((double)C[k])/(k-j);
         AP=AP*2/(i-j)-1;
         accu_AP+=AP;
+        LOG(INFO)<<"qid:"<<D.getQid(j)<<"; nDCG:"<<Y/Z<<"; AP:"<< AP;
     }
     LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize();
 }
@@ -127,6 +128,8 @@ void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) {
                 cmc.addEntry(k-j);
                 break; // account only for the first match;
             }
+        LOG(INFO)<<"top: "<< D.getO(pred_rank[j]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+1]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+2]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+3]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+4]%D.getqSize())->qid
+                 <<" "<< D.getO(pred_rank[j+5]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+6]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+7]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+8]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+9]%D.getqSize())->qid;
     }
 }
 
diff --git a/model/ranksvm.h b/model/ranksvm.h
index a17e3c9..a19ad63 100644
--- a/model/ranksvm.h
+++ b/model/ranksvm.h
@@ -27,8 +27,6 @@ protected:
 public:
     virtual int train(RidList &D)=0;
     virtual int predict(RidList &D,std::vector<double> &res)=0;
-    // TODO Not sure how to construct this
-    //  Possible solution: generate a nxn matrix each row contains the sorted list of ranker result.
     int saveModel(const std::string fname);
     static RSVM* loadModel(const std::string fname);
     virtual std::string getName()=0;
diff --git a/split.cpp b/split.cpp
index ec23af2..bcfac47 100644
--- a/split.cpp
+++ b/split.cpp
@@ -1,6 +1,10 @@
-//
-// Created by joe on 5/13/15.
-//
+/*
+ * split: helper program to split rid file
+ *  usage: ./split -h to see all options
+ *  support:
+ *      shuffling
+ *      splitting
+ */
 
 #include <iostream>
 #include <boost/program_options.hpp>
diff --git a/tools/dataProvider.h b/tools/dataProvider.h
index eed3079..891be86 100644
--- a/tools/dataProvider.h
+++ b/tools/dataProvider.h
@@ -6,15 +6,12 @@
 #include<vector>
 #include<math.h>
 
-// TODO decide how to construct training data
-// One possible way for training data:
-//  Matrix composed of an array of feature vectors
-//  Labels are composed of linked list, such as
-//      6,3,4,0,5,0,0
-//      =>  0->6 | 1->3 | 2->4->5
-//  How to compensate for non exhaustive labeling?
-//      Use -1 to indicate not yet labeled data
-//      -1s will be excluded from training
+// Training data(Rid):
+//  First line: the total number of features(fsize)
+//  from the second line to last-1
+//      nametag(string) fsize number of features for nametag
+//          possible: "-1" nametag mean don't care(partially implemented, not sure)
+//  last line(delimiter to terminate data read): 0
 
 typedef struct DataEntry{
     std::string qid;
@@ -70,7 +67,6 @@ public:
         all.clear();
     }
     void setfSize(int fsize){n=fsize;}
-    inline int getfSize(){return n;}
     void addEntry(DataEntry* d){
         int ext=false;
         all.push_back(d);
@@ -92,6 +88,8 @@ public:
             d->rank=uniq.size()-1;
         }
     }
+    // A lot of getters
+    inline int getfSize(){return n;}
     inline DataEntry* getU(int x)
     {
         return uniq[x];
@@ -182,9 +180,11 @@ public:
         }
         return res;
     }
+    // master cal -> possible multiplexing
     inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) {
         return fabs((*id)[i] - (*oth)[i]);
     }
+    // TODO getvec as VectorXd -> deprecating due to performance issues
     inline Eigen::VectorXd getVec(int x){
         int a,b,q=getqSize();
         a=x/q;
@@ -207,6 +207,7 @@ public:
             res(i)=cal(id,oth,i);
         return res;
     };
+    // w*Vec -> linear factor
     inline double getVecDot(int x,const Eigen::VectorXd &w)
     {
         int a,b,q=getqSize();
@@ -230,6 +231,7 @@ public:
             res += cal(id,oth,i)*w[i];
         return res;
     }
+    // w*Vec -> linear factor
     inline void addVecw(int x,double w,Eigen::VectorXd &X)
     {
         int a,b,q=getqSize();
@@ -251,6 +253,7 @@ public:
         for (int i=0;i<n;++i)
             X[i] += cal(id,oth,i)*w;
     }
+    // get label of vector x
     inline double getL(int x){
         int a,b,q=getqSize();
         a=x/q;
diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp
index 2b52dc7..a0cbf9a 100644
--- a/tools/fileDataProvider.cpp
+++ b/tools/fileDataProvider.cpp
@@ -8,6 +8,7 @@
 
 using namespace std;
 
+// Random generator
 mt19937 gen;
 
 int FileDP::getDataSet(DataList &out){
diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h
index 0ab1948..1c40d6e 100644
--- a/tools/fileDataProvider.h
+++ b/tools/fileDataProvider.h
@@ -1,3 +1,6 @@
+// File Data Provider
+// Any kind of dataprovider that reads from file
+
 #ifndef FDPROV_H
 #define FDPROV_H
 
@@ -8,7 +11,7 @@
 #include <fstream>
 
 // Rank qid features
-
+// Deprecated due to algorithm update
 class FileDP:public DataProvider
 {
 private:
@@ -23,7 +26,6 @@ public:
 };
 
 // label features
-
 class RidFileDP:public DataProvider
 {
 private:
diff --git a/tools/matrixIO.h b/tools/matrixIO.h
index 88cd419..ea5f85f 100644
--- a/tools/matrixIO.h
+++ b/tools/matrixIO.h
@@ -1,3 +1,5 @@
+// Some helper to read&write matrices data
+
 #ifndef MATIO_H
 #define MATIO_H
 
diff --git a/train.cpp b/train.cpp
index 83d8cdc..4e2146b 100644
--- a/train.cpp
+++ b/train.cpp
@@ -1,3 +1,24 @@
+/*
+ * ranksvm: main program
+ *  usage: ./ranksvm -h to see all options
+ *  support:
+ *      training
+ *      validating
+ *      predicting
+ *  model:
+ *      TN  RankSVM(truncated newton, conjugate gradient, various opt)
+ *      BH  bhat-dist
+ *      HE  Hell-dist(but output chance instead?!)
+ *  out features:
+ *      cmc
+ *          Cumulative Matching Characteristic
+ *      avg
+ *          Normalized avg rank
+ *      predict
+ *          image pair relevance value
+ */
+
+
 #include <iostream>
 #include <Eigen/Dense>
 #include <boost/program_options.hpp>
@@ -190,7 +211,7 @@ int main(int argc, char **argv) {
     else return 0;
     DataProvider* dp;
     if (vm["feature"].as<string>().find(".rid") == string::npos)
-        LOG(FATAL)<<"Format not supported";
+        LOG(FATAL)<<"Format no longer supported";
     else
     {
         RidFileDP* tmpdp = new RidFileDP(vm["feature"].as<string>());
-- 
cgit v1.2.3-70-g09d2