diff options
author | Joe Zhao <ztuowen@gmail.com> | 2015-06-27 20:42:23 +0800 |
---|---|---|
committer | Joe Zhao <ztuowen@gmail.com> | 2015-06-27 20:42:23 +0800 |
commit | 60881b380b02637c27497c4508faf2345a534679 (patch) | |
tree | 7813e01519a4ee8e8523b8d9445b94ae91afed38 /tools | |
parent | 76193f75ae34aa587bd87bed17a4b92eec8c6203 (diff) | |
download | ranksvm-60881b380b02637c27497c4508faf2345a534679.tar.gz ranksvm-60881b380b02637c27497c4508faf2345a534679.tar.bz2 ranksvm-60881b380b02637c27497c4508faf2345a534679.zip |
comments & readme
Diffstat (limited to 'tools')
-rw-r--r-- | tools/dataProvider.h | 23 | ||||
-rw-r--r-- | tools/fileDataProvider.cpp | 1 | ||||
-rw-r--r-- | tools/fileDataProvider.h | 6 | ||||
-rw-r--r-- | tools/matrixIO.h | 2 |
4 files changed, 20 insertions, 12 deletions
diff --git a/tools/dataProvider.h b/tools/dataProvider.h index eed3079..891be86 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -6,15 +6,12 @@ #include<vector> #include<math.h> -// TODO decide how to construct training data -// One possible way for training data: -// Matrix composed of an array of feature vectors -// Labels are composed of linked list, such as -// 6,3,4,0,5,0,0 -// => 0->6 | 1->3 | 2->4->5 -// How to compensate for non exhaustive labeling? -// Use -1 to indicate not yet labeled data -// -1s will be excluded from training +// Training data(Rid): +// First line: the total number of features(fsize) +// from the second line to last-1 +// nametag(string) fsize number of features for nametag +// possible: "-1" nametag mean don't care(partially implemented, not sure) +// last line(delimiter to terminate data read): 0 typedef struct DataEntry{ std::string qid; @@ -70,7 +67,6 @@ public: all.clear(); } void setfSize(int fsize){n=fsize;} - inline int getfSize(){return n;} void addEntry(DataEntry* d){ int ext=false; all.push_back(d); @@ -92,6 +88,8 @@ public: d->rank=uniq.size()-1; } } + // A lot of getters + inline int getfSize(){return n;} inline DataEntry* getU(int x) { return uniq[x]; @@ -182,9 +180,11 @@ public: } return res; } + // master cal -> possible multiplexing inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) { return fabs((*id)[i] - (*oth)[i]); } + // TODO getvec as VectorXd -> deprecating due to performance issues inline Eigen::VectorXd getVec(int x){ int a,b,q=getqSize(); a=x/q; @@ -207,6 +207,7 @@ public: res(i)=cal(id,oth,i); return res; }; + // w*Vec -> linear factor inline double getVecDot(int x,const Eigen::VectorXd &w) { int a,b,q=getqSize(); @@ -230,6 +231,7 @@ public: res += cal(id,oth,i)*w[i]; return res; } + // w*Vec -> linear factor inline void addVecw(int x,double w,Eigen::VectorXd &X) { int a,b,q=getqSize(); @@ -251,6 +253,7 @@ public: for (int i=0;i<n;++i) X[i] += cal(id,oth,i)*w; } + // get label of vector x inline double getL(int x){ int a,b,q=getqSize(); a=x/q; diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp index 2b52dc7..a0cbf9a 100644 --- a/tools/fileDataProvider.cpp +++ b/tools/fileDataProvider.cpp @@ -8,6 +8,7 @@ using namespace std; +// Random generator mt19937 gen; int FileDP::getDataSet(DataList &out){ diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h index 0ab1948..1c40d6e 100644 --- a/tools/fileDataProvider.h +++ b/tools/fileDataProvider.h @@ -1,3 +1,6 @@ +// File Data Provider +// Any kind of dataprovider that reads from file + #ifndef FDPROV_H #define FDPROV_H @@ -8,7 +11,7 @@ #include <fstream> // Rank qid features - +// Deprecated due to algorithm update class FileDP:public DataProvider { private: @@ -23,7 +26,6 @@ public: }; // label features - class RidFileDP:public DataProvider { private: diff --git a/tools/matrixIO.h b/tools/matrixIO.h index 88cd419..ea5f85f 100644 --- a/tools/matrixIO.h +++ b/tools/matrixIO.h @@ -1,3 +1,5 @@ +// Some helper to read&write matrices data + #ifndef MATIO_H #define MATIO_H |