From 4662779251de3b692c20d4e10980a795f04e7520 Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Sun, 12 Apr 2015 10:59:08 +0800 Subject: validate, nDCG --- CMakeLists.txt | 4 +-- main.cpp | 44 ++++++++++++++++++--------------- model/rankaccu.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ model/rankaccu.h | 13 ++++++++++ tools/dataProvider.h | 2 +- 5 files changed, 109 insertions(+), 23 deletions(-) create mode 100644 model/rankaccu.cpp create mode 100644 model/rankaccu.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e1eb353..3d8a4e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED ) INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR}) -set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp) -add_executable(ranksvm ${SOURCE_FILES}) +set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp ./model/rankaccu.cpp) +add_executable(ranksvm ${SOURCE_FILES} model/rankaccu.h model/rankaccu.cpp) TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} ) \ No newline at end of file diff --git a/main.cpp b/main.cpp index eeb6b99..0c71b07 100644 --- a/main.cpp +++ b/main.cpp @@ -5,8 +5,7 @@ #include "tools/easylogging++.h" #include "model/ranksvmtn.h" #include "tools/fileDataProvider.h" -#include "tools/matrixIO.h" -#include +#include "model/rankaccu.h" INITIALIZE_EASYLOGGINGPP @@ -28,6 +27,10 @@ int train() { dp.getDataSet(D); LOG(INFO)<<"Read "<train(D); + std::vector L; + rsvm->predict(D,L); + + rank_accu(D,L); LOG(INFO)<<"Training finished,saving model"; @@ -54,24 +57,31 @@ int predict() { rsvm->predict(D,L); } - LOG(INFO)<<"Finished,saving prediction"; - std::ofstream fout(vm["output"].as().c_str()); + if (vm.count("validate")) + { + rank_accu(D,L); + } - for (int i=0; i().c_str()); + for (int i=0; i &rank,const vector &ref1,const vector &ref2) +{ + int i=l,j=r,k; + double mid1=ref1[rank[(l+r)>>1]],mid2=ref2[rank[(l+r)>>1]]; + while (i<=j) + { + while (ref1[rank[i]]>mid1 || (ref1[rank[i]]==mid1 && ref2[rank[i]]>mid2)) ++i; + while (ref1[rank[j]]l) + ranksort(l,j,rank,ref1,ref2); + if (i pred) +{ + unsigned long n = D.getSize(); + vector orig_rank(n),pred_rank(n); + vector orig(n); + int i,j; + for (i=0;irank; + } + int cnt=0; + double accu_nDCG=0; + i=j=0; + while (iqid!=D.getData()[i+1]->qid) + { + double Y=0,Z=0; + ranksort(j,i,orig_rank,orig,pred); + ranksort(j,i,pred_rank,pred,orig); + for (int k = j;k<=i;++k) + { + Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); + Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); + } + accu_nDCG+=Y/Z; + j = i+1; + ++cnt; + } + ++i; + } + LOG(INFO)<<"Average nDGC over "<< cnt<< " queries: "<< accu_nDCG/cnt; +} \ No newline at end of file diff --git a/model/rankaccu.h b/model/rankaccu.h new file mode 100644 index 0000000..3fe5379 --- /dev/null +++ b/model/rankaccu.h @@ -0,0 +1,13 @@ +// +// Created by joe on 4/12/15. +// + +#ifndef RANKSVM_RANKACCU_H +#define RANKSVM_RANKACCU_H + +#include +#include"../tools/dataProvider.h" + +int rank_accu(DataList &D,const std::vector pred); + +#endif //RANKSVM_RANKACCU_H diff --git a/tools/dataProvider.h b/tools/dataProvider.h index da3e1ee..2c3169a 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -35,7 +35,7 @@ public: delete data[i]; data.clear(); } - std::vector& getData(){ + inline std::vector& getData(){ return data; } ~DataList(){ -- cgit v1.2.3-70-g09d2