summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2015-04-12 10:59:08 +0800
committerJoe Zhao <ztuowen@gmail.com>2015-04-12 10:59:08 +0800
commit4662779251de3b692c20d4e10980a795f04e7520 (patch)
tree9c73cb40236f3c8134f465a5eccbab0837d199df
parent6c77acb550288883c25b3c2a769313d5466dda70 (diff)
downloadranksvm-4662779251de3b692c20d4e10980a795f04e7520.tar.gz
ranksvm-4662779251de3b692c20d4e10980a795f04e7520.tar.bz2
ranksvm-4662779251de3b692c20d4e10980a795f04e7520.zip
validate, nDCG
-rw-r--r--CMakeLists.txt4
-rw-r--r--main.cpp44
-rw-r--r--model/rankaccu.cpp69
-rw-r--r--model/rankaccu.h13
-rw-r--r--tools/dataProvider.h2
5 files changed, 109 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e1eb353..3d8a4e3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED )
INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})
-set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp)
-add_executable(ranksvm ${SOURCE_FILES})
+set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp ./model/rankaccu.cpp)
+add_executable(ranksvm ${SOURCE_FILES} model/rankaccu.h model/rankaccu.cpp)
TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} ) \ No newline at end of file
diff --git a/main.cpp b/main.cpp
index eeb6b99..0c71b07 100644
--- a/main.cpp
+++ b/main.cpp
@@ -5,8 +5,7 @@
#include "tools/easylogging++.h"
#include "model/ranksvmtn.h"
#include "tools/fileDataProvider.h"
-#include "tools/matrixIO.h"
-#include <fstream>
+#include "model/rankaccu.h"
INITIALIZE_EASYLOGGINGPP
@@ -28,6 +27,10 @@ int train() {
dp.getDataSet(D);
LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features";
rsvm->train(D);
+ std::vector<double> L;
+ rsvm->predict(D,L);
+
+ rank_accu(D,L);
LOG(INFO)<<"Training finished,saving model";
@@ -54,24 +57,31 @@ int predict() {
rsvm->predict(D,L);
}
- LOG(INFO)<<"Finished,saving prediction";
- std::ofstream fout(vm["output"].as<std::string>().c_str());
+ if (vm.count("validate"))
+ {
+ rank_accu(D,L);
+ }
- for (int i=0; i<L.size();++i)
- fout<<L[i]<<std::endl;
- fout.close();
+ if (vm.count("output"))
+ {
+ LOG(INFO)<<"Finished,saving prediction";
+ std::ofstream fout(vm["output"].as<std::string>().c_str());
+ for (int i=0; i<L.size();++i)
+ fout<<L[i]<<std::endl;
+ fout.close();
+ }
+ else if (!vm.count("validate"))
+ {
+ LOG(INFO)<<"Finished";
+ for (int i=0; i<L.size();++i)
+ std::cout<<L[i]<<std::endl;
+ }
dp.close();
delete rsvm;
return 0;
}
-int validate()
-{
- LOG(FATAL)<<"Not Implemented";
- return 0;
-}
-
int main(int argc, char **argv) {
// Defining program options
po::options_description desc("Allowed options");
@@ -95,15 +105,9 @@ int main(int argc, char **argv) {
}
if (vm.count("train")) {
- LOG(INFO) << "Program option: training";
train();
}
- else if (vm.count("validate")) {
- LOG(INFO) << "Program option: validate";
- validate();
- }
- else if (vm.count("predict")) {
- LOG(INFO) << "Program option: predict";
+ else if (vm.count("validate")||vm.count("predict")) {
predict();
}
return 0;
diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp
new file mode 100644
index 0000000..2e77eb6
--- /dev/null
+++ b/model/rankaccu.cpp
@@ -0,0 +1,69 @@
+//
+// Created by joe on 4/12/15.
+//
+
+#include "rankaccu.h"
+#include "../tools/easylogging++.h"
+
+using namespace std;
+
+const double offset = 1;
+
+void ranksort(int l,int r,vector<int> &rank,const vector<double> &ref1,const vector<double> &ref2)
+{
+ int i=l,j=r,k;
+ double mid1=ref1[rank[(l+r)>>1]],mid2=ref2[rank[(l+r)>>1]];
+ while (i<=j)
+ {
+ while (ref1[rank[i]]>mid1 || (ref1[rank[i]]==mid1 && ref2[rank[i]]>mid2)) ++i;
+ while (ref1[rank[j]]<mid1 || (ref1[rank[j]]==mid1 && ref2[rank[j]]<mid2)) --j;
+ if (i<=j)
+ {
+ k=rank[i];
+ rank[i]=rank[j];
+ rank[j]=k;
+ ++i;
+ --j;
+ }
+ }
+ if (j>l)
+ ranksort(l,j,rank,ref1,ref2);
+ if (i<r)
+ ranksort(i,r,rank,ref1,ref2);
+}
+
+int rank_accu(DataList &D,const vector<double> pred)
+{
+ unsigned long n = D.getSize();
+ vector<int> orig_rank(n),pred_rank(n);
+ vector<double> orig(n);
+ int i,j;
+ for (i=0;i<D.getSize();++i)
+ {
+ orig_rank[i]=i;
+ pred_rank[i]=i;
+ orig[i]=D.getData()[i]->rank;
+ }
+ int cnt=0;
+ double accu_nDCG=0;
+ i=j=0;
+ while (i<D.getSize())
+ {
+ if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid)
+ {
+ double Y=0,Z=0;
+ ranksort(j,i,orig_rank,orig,pred);
+ ranksort(j,i,pred_rank,pred,orig);
+ for (int k = j;k<=i;++k)
+ {
+ Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j);
+ Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j);
+ }
+ accu_nDCG+=Y/Z;
+ j = i+1;
+ ++cnt;
+ }
+ ++i;
+ }
+ LOG(INFO)<<"Average nDGC over "<< cnt<< " queries: "<< accu_nDCG/cnt;
+} \ No newline at end of file
diff --git a/model/rankaccu.h b/model/rankaccu.h
new file mode 100644
index 0000000..3fe5379
--- /dev/null
+++ b/model/rankaccu.h
@@ -0,0 +1,13 @@
+//
+// Created by joe on 4/12/15.
+//
+
+#ifndef RANKSVM_RANKACCU_H
+#define RANKSVM_RANKACCU_H
+
+#include<vector>
+#include"../tools/dataProvider.h"
+
+int rank_accu(DataList &D,const std::vector<double> pred);
+
+#endif //RANKSVM_RANKACCU_H
diff --git a/tools/dataProvider.h b/tools/dataProvider.h
index da3e1ee..2c3169a 100644
--- a/tools/dataProvider.h
+++ b/tools/dataProvider.h
@@ -35,7 +35,7 @@ public:
delete data[i];
data.clear();
}
- std::vector<DataEntry*>& getData(){
+ inline std::vector<DataEntry*>& getData(){
return data;
}
~DataList(){