From 60881b380b02637c27497c4508faf2345a534679 Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Sat, 27 Jun 2015 20:42:23 +0800 Subject: comments & readme --- CMakeLists.txt | 1 - README | 24 ++++++++++++++++++++++++ model/rankaccu.cpp | 3 +++ model/ranksvm.h | 2 -- split.cpp | 10 +++++++--- tools/dataProvider.h | 23 +++++++++++++---------- tools/fileDataProvider.cpp | 1 + tools/fileDataProvider.h | 6 ++++-- tools/matrixIO.h | 2 ++ train.cpp | 23 ++++++++++++++++++++++- 10 files changed, 76 insertions(+), 19 deletions(-) create mode 100644 README diff --git a/CMakeLists.txt b/CMakeLists.txt index 97d548e..82aa7f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,5 @@ INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR}) set(SOURCE_FILES model/ranksvm.cpp model/ranksvmtn.cpp model/rankaccu.cpp tools/fileDataProvider.cpp) add_executable(ranksvm train.cpp ${SOURCE_FILES} model/rankaccu.h model/ranksvm.h model/ranksvmtn.h tools/dataProvider.h tools/matrixIO.h tools/fileDataProvider.h tools/dataProvider.cpp model/rankmisc.h) add_executable(split split.cpp ${SOURCE_FILES} tools/dataProvider.cpp) -add_dependencies(ranksvm split) TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} ) TARGET_LINK_LIBRARIES( split ${Boost_LIBRARIES}) \ No newline at end of file diff --git a/README b/README new file mode 100644 index 0000000..54d8f50 --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +RankSVM Training, validating, & predicting +========================================== + +Acknowledgement: +Logging is provided by easylogging++ +up-to-date site: https://github.com/easylogging/easyloggingpp + +Requirements: +CMAKE, +GCC(C++) or any c++11 compatible compiler, +boost libraries(program_options), +Eigen3. + +How to build(Release): +1.copy the source files to an empty directory +2.mkdir Release +3.cd Release +# In the Release directory +4.cmake -DCMAKE_BUILD_TYPE=Release .. +# Change the Release to Debug for Debug build + +How to build it under windows(un-tested) +1.Install & configure: Eigen3 & boost +2.use cmake or VS \ No newline at end of file diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp index 0f55e26..caa3c5a 100644 --- a/model/rankaccu.cpp +++ b/model/rankaccu.cpp @@ -102,6 +102,7 @@ void rank_accu(RidList &D,const vector pred) AP += ((double)C[k])/(k-j); AP=AP*2/(i-j)-1; accu_AP+=AP; + LOG(INFO)<<"qid:"< pred,CMC & cmc) { cmc.addEntry(k-j); break; // account only for the first match; } + LOG(INFO)<<"top: "<< D.getO(pred_rank[j]%D.getqSize())->qid <<" "<qid <<" "<qid <<" "<qid <<" "<qid + <<" "<< D.getO(pred_rank[j+5]%D.getqSize())->qid <<" "<qid <<" "<qid <<" "<qid <<" "<qid; } } diff --git a/model/ranksvm.h b/model/ranksvm.h index a17e3c9..a19ad63 100644 --- a/model/ranksvm.h +++ b/model/ranksvm.h @@ -27,8 +27,6 @@ protected: public: virtual int train(RidList &D)=0; virtual int predict(RidList &D,std::vector &res)=0; - // TODO Not sure how to construct this - // Possible solution: generate a nxn matrix each row contains the sorted list of ranker result. int saveModel(const std::string fname); static RSVM* loadModel(const std::string fname); virtual std::string getName()=0; diff --git a/split.cpp b/split.cpp index ec23af2..bcfac47 100644 --- a/split.cpp +++ b/split.cpp @@ -1,6 +1,10 @@ -// -// Created by joe on 5/13/15. -// +/* + * split: helper program to split rid file + * usage: ./split -h to see all options + * support: + * shuffling + * splitting + */ #include #include diff --git a/tools/dataProvider.h b/tools/dataProvider.h index eed3079..891be86 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -6,15 +6,12 @@ #include #include -// TODO decide how to construct training data -// One possible way for training data: -// Matrix composed of an array of feature vectors -// Labels are composed of linked list, such as -// 6,3,4,0,5,0,0 -// => 0->6 | 1->3 | 2->4->5 -// How to compensate for non exhaustive labeling? -// Use -1 to indicate not yet labeled data -// -1s will be excluded from training +// Training data(Rid): +// First line: the total number of features(fsize) +// from the second line to last-1 +// nametag(string) fsize number of features for nametag +// possible: "-1" nametag mean don't care(partially implemented, not sure) +// last line(delimiter to terminate data read): 0 typedef struct DataEntry{ std::string qid; @@ -70,7 +67,6 @@ public: all.clear(); } void setfSize(int fsize){n=fsize;} - inline int getfSize(){return n;} void addEntry(DataEntry* d){ int ext=false; all.push_back(d); @@ -92,6 +88,8 @@ public: d->rank=uniq.size()-1; } } + // A lot of getters + inline int getfSize(){return n;} inline DataEntry* getU(int x) { return uniq[x]; @@ -182,9 +180,11 @@ public: } return res; } + // master cal -> possible multiplexing inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) { return fabs((*id)[i] - (*oth)[i]); } + // TODO getvec as VectorXd -> deprecating due to performance issues inline Eigen::VectorXd getVec(int x){ int a,b,q=getqSize(); a=x/q; @@ -207,6 +207,7 @@ public: res(i)=cal(id,oth,i); return res; }; + // w*Vec -> linear factor inline double getVecDot(int x,const Eigen::VectorXd &w) { int a,b,q=getqSize(); @@ -230,6 +231,7 @@ public: res += cal(id,oth,i)*w[i]; return res; } + // w*Vec -> linear factor inline void addVecw(int x,double w,Eigen::VectorXd &X) { int a,b,q=getqSize(); @@ -251,6 +253,7 @@ public: for (int i=0;i // Rank qid features - +// Deprecated due to algorithm update class FileDP:public DataProvider { private: @@ -23,7 +26,6 @@ public: }; // label features - class RidFileDP:public DataProvider { private: diff --git a/tools/matrixIO.h b/tools/matrixIO.h index 88cd419..ea5f85f 100644 --- a/tools/matrixIO.h +++ b/tools/matrixIO.h @@ -1,3 +1,5 @@ +// Some helper to read&write matrices data + #ifndef MATIO_H #define MATIO_H diff --git a/train.cpp b/train.cpp index 83d8cdc..4e2146b 100644 --- a/train.cpp +++ b/train.cpp @@ -1,3 +1,24 @@ +/* + * ranksvm: main program + * usage: ./ranksvm -h to see all options + * support: + * training + * validating + * predicting + * model: + * TN RankSVM(truncated newton, conjugate gradient, various opt) + * BH bhat-dist + * HE Hell-dist(but output chance instead?!) + * out features: + * cmc + * Cumulative Matching Characteristic + * avg + * Normalized avg rank + * predict + * image pair relevance value + */ + + #include #include #include @@ -190,7 +211,7 @@ int main(int argc, char **argv) { else return 0; DataProvider* dp; if (vm["feature"].as().find(".rid") == string::npos) - LOG(FATAL)<<"Format not supported"; + LOG(FATAL)<<"Format no longer supported"; else { RidFileDP* tmpdp = new RidFileDP(vm["feature"].as()); -- cgit v1.2.3-70-g09d2