summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2015-06-27 20:42:23 +0800
committerJoe Zhao <ztuowen@gmail.com>2015-06-27 20:42:23 +0800
commit60881b380b02637c27497c4508faf2345a534679 (patch)
tree7813e01519a4ee8e8523b8d9445b94ae91afed38
parent76193f75ae34aa587bd87bed17a4b92eec8c6203 (diff)
downloadranksvm-60881b380b02637c27497c4508faf2345a534679.tar.gz
ranksvm-60881b380b02637c27497c4508faf2345a534679.tar.bz2
ranksvm-60881b380b02637c27497c4508faf2345a534679.zip
comments & readme
-rw-r--r--CMakeLists.txt1
-rw-r--r--README24
-rw-r--r--model/rankaccu.cpp3
-rw-r--r--model/ranksvm.h2
-rw-r--r--split.cpp10
-rw-r--r--tools/dataProvider.h23
-rw-r--r--tools/fileDataProvider.cpp1
-rw-r--r--tools/fileDataProvider.h6
-rw-r--r--tools/matrixIO.h2
-rw-r--r--train.cpp23
10 files changed, 76 insertions, 19 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 97d548e..82aa7f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,6 +14,5 @@ INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})
set(SOURCE_FILES model/ranksvm.cpp model/ranksvmtn.cpp model/rankaccu.cpp tools/fileDataProvider.cpp)
add_executable(ranksvm train.cpp ${SOURCE_FILES} model/rankaccu.h model/ranksvm.h model/ranksvmtn.h tools/dataProvider.h tools/matrixIO.h tools/fileDataProvider.h tools/dataProvider.cpp model/rankmisc.h)
add_executable(split split.cpp ${SOURCE_FILES} tools/dataProvider.cpp)
-add_dependencies(ranksvm split)
TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} )
TARGET_LINK_LIBRARIES( split ${Boost_LIBRARIES}) \ No newline at end of file
diff --git a/README b/README
new file mode 100644
index 0000000..54d8f50
--- /dev/null
+++ b/README
@@ -0,0 +1,24 @@
+RankSVM Training, validating, & predicting
+==========================================
+
+Acknowledgement:
+Logging is provided by easylogging++
+up-to-date site: https://github.com/easylogging/easyloggingpp
+
+Requirements:
+CMAKE,
+GCC(C++) or any c++11 compatible compiler,
+boost libraries(program_options),
+Eigen3.
+
+How to build(Release):
+1.copy the source files to an empty directory
+2.mkdir Release
+3.cd Release
+# In the Release directory
+4.cmake -DCMAKE_BUILD_TYPE=Release ..
+# Change the Release to Debug for Debug build
+
+How to build it under windows(un-tested)
+1.Install & configure: Eigen3 & boost
+2.use cmake or VS \ No newline at end of file
diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp
index 0f55e26..caa3c5a 100644
--- a/model/rankaccu.cpp
+++ b/model/rankaccu.cpp
@@ -102,6 +102,7 @@ void rank_accu(RidList &D,const vector<double> pred)
AP += ((double)C[k])/(k-j);
AP=AP*2/(i-j)-1;
accu_AP+=AP;
+ LOG(INFO)<<"qid:"<<D.getQid(j)<<"; nDCG:"<<Y/Z<<"; AP:"<< AP;
}
LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize();
}
@@ -127,6 +128,8 @@ void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) {
cmc.addEntry(k-j);
break; // account only for the first match;
}
+ LOG(INFO)<<"top: "<< D.getO(pred_rank[j]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+1]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+2]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+3]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+4]%D.getqSize())->qid
+ <<" "<< D.getO(pred_rank[j+5]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+6]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+7]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+8]%D.getqSize())->qid <<" "<<D.getO(pred_rank[j+9]%D.getqSize())->qid;
}
}
diff --git a/model/ranksvm.h b/model/ranksvm.h
index a17e3c9..a19ad63 100644
--- a/model/ranksvm.h
+++ b/model/ranksvm.h
@@ -27,8 +27,6 @@ protected:
public:
virtual int train(RidList &D)=0;
virtual int predict(RidList &D,std::vector<double> &res)=0;
- // TODO Not sure how to construct this
- // Possible solution: generate a nxn matrix each row contains the sorted list of ranker result.
int saveModel(const std::string fname);
static RSVM* loadModel(const std::string fname);
virtual std::string getName()=0;
diff --git a/split.cpp b/split.cpp
index ec23af2..bcfac47 100644
--- a/split.cpp
+++ b/split.cpp
@@ -1,6 +1,10 @@
-//
-// Created by joe on 5/13/15.
-//
+/*
+ * split: helper program to split rid file
+ * usage: ./split -h to see all options
+ * support:
+ * shuffling
+ * splitting
+ */
#include <iostream>
#include <boost/program_options.hpp>
diff --git a/tools/dataProvider.h b/tools/dataProvider.h
index eed3079..891be86 100644
--- a/tools/dataProvider.h
+++ b/tools/dataProvider.h
@@ -6,15 +6,12 @@
#include<vector>
#include<math.h>
-// TODO decide how to construct training data
-// One possible way for training data:
-// Matrix composed of an array of feature vectors
-// Labels are composed of linked list, such as
-// 6,3,4,0,5,0,0
-// => 0->6 | 1->3 | 2->4->5
-// How to compensate for non exhaustive labeling?
-// Use -1 to indicate not yet labeled data
-// -1s will be excluded from training
+// Training data(Rid):
+// First line: the total number of features(fsize)
+// from the second line to last-1
+// nametag(string) fsize number of features for nametag
+// possible: "-1" nametag mean don't care(partially implemented, not sure)
+// last line(delimiter to terminate data read): 0
typedef struct DataEntry{
std::string qid;
@@ -70,7 +67,6 @@ public:
all.clear();
}
void setfSize(int fsize){n=fsize;}
- inline int getfSize(){return n;}
void addEntry(DataEntry* d){
int ext=false;
all.push_back(d);
@@ -92,6 +88,8 @@ public:
d->rank=uniq.size()-1;
}
}
+ // A lot of getters
+ inline int getfSize(){return n;}
inline DataEntry* getU(int x)
{
return uniq[x];
@@ -182,9 +180,11 @@ public:
}
return res;
}
+ // master cal -> possible multiplexing
inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) {
return fabs((*id)[i] - (*oth)[i]);
}
+ // TODO getvec as VectorXd -> deprecating due to performance issues
inline Eigen::VectorXd getVec(int x){
int a,b,q=getqSize();
a=x/q;
@@ -207,6 +207,7 @@ public:
res(i)=cal(id,oth,i);
return res;
};
+ // w*Vec -> linear factor
inline double getVecDot(int x,const Eigen::VectorXd &w)
{
int a,b,q=getqSize();
@@ -230,6 +231,7 @@ public:
res += cal(id,oth,i)*w[i];
return res;
}
+ // w*Vec -> linear factor
inline void addVecw(int x,double w,Eigen::VectorXd &X)
{
int a,b,q=getqSize();
@@ -251,6 +253,7 @@ public:
for (int i=0;i<n;++i)
X[i] += cal(id,oth,i)*w;
}
+ // get label of vector x
inline double getL(int x){
int a,b,q=getqSize();
a=x/q;
diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp
index 2b52dc7..a0cbf9a 100644
--- a/tools/fileDataProvider.cpp
+++ b/tools/fileDataProvider.cpp
@@ -8,6 +8,7 @@
using namespace std;
+// Random generator
mt19937 gen;
int FileDP::getDataSet(DataList &out){
diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h
index 0ab1948..1c40d6e 100644
--- a/tools/fileDataProvider.h
+++ b/tools/fileDataProvider.h
@@ -1,3 +1,6 @@
+// File Data Provider
+// Any kind of dataprovider that reads from file
+
#ifndef FDPROV_H
#define FDPROV_H
@@ -8,7 +11,7 @@
#include <fstream>
// Rank qid features
-
+// Deprecated due to algorithm update
class FileDP:public DataProvider
{
private:
@@ -23,7 +26,6 @@ public:
};
// label features
-
class RidFileDP:public DataProvider
{
private:
diff --git a/tools/matrixIO.h b/tools/matrixIO.h
index 88cd419..ea5f85f 100644
--- a/tools/matrixIO.h
+++ b/tools/matrixIO.h
@@ -1,3 +1,5 @@
+// Some helper to read&write matrices data
+
#ifndef MATIO_H
#define MATIO_H
diff --git a/train.cpp b/train.cpp
index 83d8cdc..4e2146b 100644
--- a/train.cpp
+++ b/train.cpp
@@ -1,3 +1,24 @@
+/*
+ * ranksvm: main program
+ * usage: ./ranksvm -h to see all options
+ * support:
+ * training
+ * validating
+ * predicting
+ * model:
+ * TN RankSVM(truncated newton, conjugate gradient, various opt)
+ * BH bhat-dist
+ * HE Hell-dist(but output chance instead?!)
+ * out features:
+ * cmc
+ * Cumulative Matching Characteristic
+ * avg
+ * Normalized avg rank
+ * predict
+ * image pair relevance value
+ */
+
+
#include <iostream>
#include <Eigen/Dense>
#include <boost/program_options.hpp>
@@ -190,7 +211,7 @@ int main(int argc, char **argv) {
else return 0;
DataProvider* dp;
if (vm["feature"].as<string>().find(".rid") == string::npos)
- LOG(FATAL)<<"Format not supported";
+ LOG(FATAL)<<"Format no longer supported";
else
{
RidFileDP* tmpdp = new RidFileDP(vm["feature"].as<string>());