diff options
| author | Joe Zhao <ztuowen@gmail.com> | 2015-04-12 10:59:08 +0800 | 
|---|---|---|
| committer | Joe Zhao <ztuowen@gmail.com> | 2015-04-12 10:59:08 +0800 | 
| commit | 4662779251de3b692c20d4e10980a795f04e7520 (patch) | |
| tree | 9c73cb40236f3c8134f465a5eccbab0837d199df | |
| parent | 6c77acb550288883c25b3c2a769313d5466dda70 (diff) | |
| download | ranksvm-4662779251de3b692c20d4e10980a795f04e7520.tar.gz ranksvm-4662779251de3b692c20d4e10980a795f04e7520.tar.bz2 ranksvm-4662779251de3b692c20d4e10980a795f04e7520.zip  | |
validate, nDCG
| -rw-r--r-- | CMakeLists.txt | 4 | ||||
| -rw-r--r-- | main.cpp | 44 | ||||
| -rw-r--r-- | model/rankaccu.cpp | 69 | ||||
| -rw-r--r-- | model/rankaccu.h | 13 | ||||
| -rw-r--r-- | tools/dataProvider.h | 2 | 
5 files changed, 109 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e1eb353..3d8a4e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")  FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED )  INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR}) -set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp) -add_executable(ranksvm ${SOURCE_FILES}) +set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp ./model/rankaccu.cpp) +add_executable(ranksvm ${SOURCE_FILES} model/rankaccu.h model/rankaccu.cpp)  TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} )
\ No newline at end of file @@ -5,8 +5,7 @@  #include "tools/easylogging++.h"  #include "model/ranksvmtn.h"  #include "tools/fileDataProvider.h" -#include "tools/matrixIO.h" -#include <fstream> +#include "model/rankaccu.h"  INITIALIZE_EASYLOGGINGPP @@ -28,6 +27,10 @@ int train() {      dp.getDataSet(D);      LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features";      rsvm->train(D); +    std::vector<double> L; +    rsvm->predict(D,L); + +    rank_accu(D,L);      LOG(INFO)<<"Training finished,saving model"; @@ -54,24 +57,31 @@ int predict() {          rsvm->predict(D,L);      } -    LOG(INFO)<<"Finished,saving prediction"; -    std::ofstream fout(vm["output"].as<std::string>().c_str()); +    if (vm.count("validate")) +    { +        rank_accu(D,L); +    } -    for (int i=0; i<L.size();++i) -        fout<<L[i]<<std::endl; -    fout.close(); +    if (vm.count("output")) +    { +        LOG(INFO)<<"Finished,saving prediction"; +        std::ofstream fout(vm["output"].as<std::string>().c_str()); +        for (int i=0; i<L.size();++i) +            fout<<L[i]<<std::endl; +        fout.close(); +    } +    else if (!vm.count("validate")) +    { +        LOG(INFO)<<"Finished"; +        for (int i=0; i<L.size();++i) +            std::cout<<L[i]<<std::endl; +    }      dp.close();      delete rsvm;      return 0;  } -int validate() -{ -    LOG(FATAL)<<"Not Implemented"; -    return 0; -} -  int main(int argc, char **argv) {      // Defining program options      po::options_description desc("Allowed options"); @@ -95,15 +105,9 @@ int main(int argc, char **argv) {      }      if (vm.count("train")) { -        LOG(INFO) << "Program option: training";          train();      } -    else if (vm.count("validate")) { -        LOG(INFO) << "Program option: validate"; -        validate(); -    } -    else if (vm.count("predict")) { -        LOG(INFO) << "Program option: predict"; +    else if (vm.count("validate")||vm.count("predict")) {          predict();      }      return 0; diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp new file mode 100644 index 0000000..2e77eb6 --- /dev/null +++ b/model/rankaccu.cpp @@ -0,0 +1,69 @@ +// +// Created by joe on 4/12/15. +// + +#include "rankaccu.h" +#include "../tools/easylogging++.h" + +using namespace std; + +const double offset = 1; + +void ranksort(int l,int r,vector<int> &rank,const vector<double> &ref1,const vector<double> &ref2) +{ +    int i=l,j=r,k; +    double mid1=ref1[rank[(l+r)>>1]],mid2=ref2[rank[(l+r)>>1]]; +    while (i<=j) +    { +        while (ref1[rank[i]]>mid1 || (ref1[rank[i]]==mid1 && ref2[rank[i]]>mid2)) ++i; +        while (ref1[rank[j]]<mid1 || (ref1[rank[j]]==mid1 && ref2[rank[j]]<mid2)) --j; +        if (i<=j) +        { +            k=rank[i]; +            rank[i]=rank[j]; +            rank[j]=k; +            ++i; +            --j; +        } +    } +    if (j>l) +        ranksort(l,j,rank,ref1,ref2); +    if (i<r) +        ranksort(i,r,rank,ref1,ref2); +} + +int rank_accu(DataList &D,const vector<double> pred) +{ +    unsigned long n = D.getSize(); +    vector<int> orig_rank(n),pred_rank(n); +    vector<double> orig(n); +    int i,j; +    for (i=0;i<D.getSize();++i) +    { +        orig_rank[i]=i; +        pred_rank[i]=i; +        orig[i]=D.getData()[i]->rank; +    } +    int cnt=0; +    double accu_nDCG=0; +    i=j=0; +    while (i<D.getSize()) +    { +        if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid) +        { +            double Y=0,Z=0; +            ranksort(j,i,orig_rank,orig,pred); +            ranksort(j,i,pred_rank,pred,orig); +            for (int k = j;k<=i;++k) +            { +                Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j); +                Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); +            } +            accu_nDCG+=Y/Z; +            j = i+1; +            ++cnt; +        } +        ++i; +    } +    LOG(INFO)<<"Average nDGC over "<< cnt<< " queries: "<< accu_nDCG/cnt; +}
\ No newline at end of file diff --git a/model/rankaccu.h b/model/rankaccu.h new file mode 100644 index 0000000..3fe5379 --- /dev/null +++ b/model/rankaccu.h @@ -0,0 +1,13 @@ +// +// Created by joe on 4/12/15. +// + +#ifndef RANKSVM_RANKACCU_H +#define RANKSVM_RANKACCU_H + +#include<vector> +#include"../tools/dataProvider.h" + +int rank_accu(DataList &D,const std::vector<double> pred); + +#endif //RANKSVM_RANKACCU_H diff --git a/tools/dataProvider.h b/tools/dataProvider.h index da3e1ee..2c3169a 100644 --- a/tools/dataProvider.h +++ b/tools/dataProvider.h @@ -35,7 +35,7 @@ public:              delete data[i];          data.clear();      } -    std::vector<DataEntry*>& getData(){ +    inline std::vector<DataEntry*>& getData(){          return data;      }      ~DataList(){  | 
