From c7152b8843fadd0961ce8fe20c1eac6403f0275d Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Sun, 12 Apr 2015 11:30:01 +0800 Subject: validate AP --- main.cpp | 10 +++------- model/rankaccu.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/main.cpp b/main.cpp index 0c71b07..a517d88 100644 --- a/main.cpp +++ b/main.cpp @@ -19,7 +19,6 @@ int train() { rsvm = RSVM::loadModel(vm["model"].as()); FileDP dp(vm["feature"].as()); - // Generic training operations dp.open(); DataList D; @@ -50,12 +49,9 @@ int predict() { std::vector L; LOG(INFO)<<"Prediction started"; - while (!dp.EOFile()) - { - dp.getDataSet(D); - LOG(INFO)<<"Read "<predict(D,L); - } + dp.getDataSet(D); + LOG(INFO)<<"Read "<predict(D,L); if (vm.count("validate")) { diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp index 2e77eb6..069e245 100644 --- a/model/rankaccu.cpp +++ b/model/rankaccu.cpp @@ -32,10 +32,49 @@ void ranksort(int l,int r,vector &rank,const vector &ref1,const vec ranksort(i,r,rank,ref1,ref2); } +void rankmerge(int l,int r,vector &C,vector &rank,const vector &ref1,const vector &ref2) +{ + if (l==r) + return; + int i=l,j=((l+r)>>1)+1; + rankmerge(i,j-1,C,rank,ref1,ref2); + rankmerge(j,r,C,rank,ref1,ref2); + vector stage_r(r-l+1),stage_c(r-l+1); + int cnt=0; + int k=0; + while (i<=((l+r)>>1) && j<=r) + { + if (ref1[rank[i]]>ref1[rank[j]] || (ref1[rank[i]]==ref1[rank[j]] && ref2[rank[i]]>ref2[rank[j]])) { + stage_c[k] = C[i]; + stage_r[k++] = rank[i++]; + ++cnt; + } + else{ + stage_c[k] = C[j]+cnt; + stage_r[k++] = rank[j++]; + } + } + while (i<=((l+r)>>1)) + { + stage_c[k] = C[i]; + stage_r[k++] = rank[i++]; + } + while (j<=r) + { + stage_c[k] = C[j]+cnt; + stage_r[k++] = rank[j++]; + } + for (i=l;i<=r;++i) + { + C[i]=stage_c[i-l]; + rank[i]=stage_r[i-l]; + } +} + int rank_accu(DataList &D,const vector pred) { unsigned long n = D.getSize(); - vector orig_rank(n),pred_rank(n); + vector orig_rank(n),pred_rank(n),C(n); vector orig(n); int i,j; for (i=0;i pred) } int cnt=0; double accu_nDCG=0; + double accu_AP=0; i=j=0; while (iqid!=D.getData()[i+1]->qid) { double Y=0,Z=0; + double AP=0; ranksort(j,i,orig_rank,orig,pred); ranksort(j,i,pred_rank,pred,orig); for (int k = j;k<=i;++k) @@ -60,10 +101,15 @@ int rank_accu(DataList &D,const vector pred) Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j); } accu_nDCG+=Y/Z; + rankmerge(j,i,C,orig_rank,pred,orig); + for (int k = j+1;k<=i;++k) + AP += ((double)C[k])/(k-j); + AP=AP*2/(i-j)-1; + accu_AP+=AP; j = i+1; ++cnt; } ++i; } - LOG(INFO)<<"Average nDGC over "<< cnt<< " queries: "<< accu_nDCG/cnt; + LOG(INFO)<<"over "<< cnt<< " queries. "<<"Average nDGC: "<< accu_nDCG/cnt<< " Average AP: "<