summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rw-r--r--model/rankaccu.cpp81
-rw-r--r--model/rankaccu.h4
-rw-r--r--model/ranksvm.h2
-rw-r--r--model/ranksvmtn.cpp26
-rw-r--r--model/ranksvmtn.h2
-rw-r--r--tools/dataProvider.h32
-rw-r--r--train.cpp29
8 files changed, 84 insertions, 94 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 180456c..0e356fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ INCLUDE_DIRECTORIES ( "/usr/include/eigen3" )
# Use Random Library for Random Number Generation
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fopenmp")
FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED )
INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})
diff --git a/model/rankaccu.cpp b/model/rankaccu.cpp
index 726f2c8..f998e93 100644
--- a/model/rankaccu.cpp
+++ b/model/rankaccu.cpp
@@ -70,79 +70,64 @@ void rankmerge(int l,int r,vector<int> &C,vector<int> &rank,const vector<double>
}
}
-void rank_accu(DataList &D,const vector<double> pred)
+void rank_accu(RidList &D,const vector<double> pred)
{
unsigned long n = D.getSize();
vector<int> orig_rank(n),pred_rank(n),C(n);
vector<double> orig(n);
- vector<DataEntry*> &dat = D.getData();
int i,j;
- for (i=0;i<dat.size();++i)
+ for (i=0;i<D.getSize();++i)
{
orig_rank[i]=i;
pred_rank[i]=i;
- orig[i]=dat[i]->rank;
+ orig[i]=D.getL(i);
}
- int cnt=0;
double accu_nDCG=0;
double accu_AP=0;
- i=j=0;
- while (i<dat.size())
+ for (j=0; j<D.getSize();j+=D.getqSize())
{
- if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid)
+ i=j+D.getqSize()-1;
+ double Y=0,Z=0;
+ double AP=0;
+ ranksort(j,i,orig_rank,orig,pred);
+ ranksort(j,i,pred_rank,pred,orig);
+ for (int k = j;k<=i;++k)
{
- double Y=0,Z=0;
- double AP=0;
- ranksort(j,i,orig_rank,orig,pred);
- ranksort(j,i,pred_rank,pred,orig);
- for (int k = j;k<=i;++k)
- {
- Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j);
- Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j);
- }
- accu_nDCG+=Y/Z;
- rankmerge(j,i,C,orig_rank,pred,orig);
- for (int k = j+1;k<=i;++k)
- AP += ((double)C[k])/(k-j);
- AP=AP*2/(i-j)-1;
- accu_AP+=AP;
- j = i+1;
- ++cnt;
+ Z += (pow(2,offset+orig[orig_rank[k]]) - 1)/log2(2+k-j);
+ Y += (pow(2,offset+orig[pred_rank[k]]) - 1)/log2(2+k-j);
}
- ++i;
+ accu_nDCG+=Y/Z;
+ rankmerge(j,i,C,orig_rank,pred,orig);
+ for (int k = j+1;k<=i;++k)
+ AP += ((double)C[k])/(k-j);
+ AP=AP*2/(i-j)-1;
+ accu_AP+=AP;
}
- LOG(INFO)<<"over "<< cnt<< " queries. "<<"Average nDGC: "<< accu_nDCG/cnt<< " Average AP: "<<accu_AP/cnt;
+ LOG(INFO)<<"over "<< D.getuSize()<< " queries. "<<"Average nDGC: "<< accu_nDCG/D.getuSize()<< " Average AP: "<<accu_AP/D.getuSize();
}
-void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc) {
+void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc) {
unsigned long n = D.getSize();
vector<int> orig_rank(n),pred_rank(n);
vector<double> orig(n);
- vector<DataEntry*> &dat = D.getData();
int i,j;
- for (i=0;i<dat.size();++i)
+ for (i=0;i<D.getSize();++i)
{
orig_rank[i]=i;
pred_rank[i]=i;
- orig[i]=dat[i]->rank;
+ orig[i]=D.getL(i);
}
- int cnt=0;
- i=j=0;
- while (i<dat.size())
+ for (j=0; j<D.getSize();j+=D.getqSize())
{
- if ((i+1 == dat.size())|| dat[i]->qid!=dat[i+1]->qid)
- {
- ranksort(j,i,pred_rank,pred,orig);
- for (int k=j;k<=i;++k)
- if (orig[pred_rank[k]]>0)
- {
- LOG(INFO)<<"qid:"<<dat[pred_rank[k]]->qid<<"; pred:"<<pred[k]<<"; rank:"<< k-j;
- cmc.addEntry(k-j);
- break; // account only for the first match;
- }
- j = i+1;
- ++cnt;
- }
- ++i;
+ i=j+D.getqSize()-1;
+ ranksort(j,i,pred_rank,pred,orig);
+ for (int k=j;k<=i;++k)
+ if (orig[pred_rank[k]]>0)
+ {
+ LOG(INFO)<<"qid:"<<D.getQid(j)<<"; pred:"<<pred[k]<<"; rank:"<< k-j;
+ cmc.addEntry(k-j);
+ break; // account only for the first match;
+ }
+ j = i+1;
}
} \ No newline at end of file
diff --git a/model/rankaccu.h b/model/rankaccu.h
index bcb8906..832a9f5 100644
--- a/model/rankaccu.h
+++ b/model/rankaccu.h
@@ -36,8 +36,8 @@ public:
}
};
-void rank_CMC(DataList &D,const std::vector<double> pred,CMC & cmc);
+void rank_CMC(RidList &D,const std::vector<double> pred,CMC & cmc);
-void rank_accu(DataList &D,const std::vector<double> pred);
+void rank_accu(RidList &D,const std::vector<double> pred);
#endif //RANKSVM_RANKACCU_H
diff --git a/model/ranksvm.h b/model/ranksvm.h
index 9051343..20bb35a 100644
--- a/model/ranksvm.h
+++ b/model/ranksvm.h
@@ -26,7 +26,7 @@ protected:
int fsize;
public:
virtual int train(RidList &D)=0;
- virtual int predict(DataList &D,std::vector<double> &res)=0;
+ virtual int predict(RidList &D,std::vector<double> &res)=0;
// TODO Not sure how to construct this
// Possible solution: generate a nxn matrix each row contains the sorted list of ranker result.
int saveModel(const std::string fname);
diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp
index 1414c81..f904fdd 100644
--- a/model/ranksvmtn.cpp
+++ b/model/ranksvmtn.cpp
@@ -7,7 +7,7 @@ using namespace std;
using namespace Eigen;
// Main terminating criteria
-const int maxiter = 40; // max iteration count
+const int maxiter = 50; // max iteration count
const double prec=1e-10; // precision
// conjugate gradient
const double cg_prec=1e-10; // precision
@@ -17,14 +17,21 @@ const int ls_maxiter = 10;
const double line_prec=1e-10; // precision
const double line_turb=1e-15; // purturbation
+void cal_Dw(RidList &D,const VectorXd &w, VectorXd &Dw)
+{
+ int n = D.getSize();
+ #pragma omp parallel for
+ for (int i=0;i<n;++i)
+ Dw(i) = D.getVec(i).dot(w);
+}
+
int cal_Hs(RidList &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alpha,const VectorXd s,VectorXd &Hs)
{
int n = D.getSize();
int q = D.getqSize();
Hs = VectorXd::Zero(s.rows());
VectorXd Ds(n);
- for (int i=0;i<n;++i)
- Ds(i) = D.getVec(i).dot(s);
+ cal_Dw(D,s,Ds);
VectorXd gamma(n);
for (int i=0;i<n;)
{
@@ -153,8 +160,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd
{
int n=D.getSize();
VectorXd Dd(n);
- for (int i=0;i<n;++i)
- Dd(i) = D.getVec(i).dot(step);
+ cal_Dw(D,step,Dd);
VectorXd alpha,beta,yt;
VectorXd grad;
VectorXd Hs;
@@ -167,8 +173,7 @@ int line_search(const VectorXd &w,RidList &D,const VectorXd &corr,const VectorXd
while (1)
{
grad=w+t*step;
- for (int i=0;i<n;++i)
- Dd(i) = D.getVec(i).dot(grad);
+ cal_Dw(D,grad,Dd);
cal_alpha_beta(Dd,corr,D,rank,yt,alpha,beta);
VectorXd tmp = alpha.cwiseProduct(yt)-beta;
VectorXd res = 0*grad;
@@ -209,8 +214,7 @@ int train_orig(int fsize, RidList &Data,const VectorXd &corr,VectorXd &weight){
VectorXd alpha,beta;
while (true)
{
- for (int i=0;i<n;++i)
- dw(i) = Data.getVec(i).dot(weight);
+ cal_Dw(Data,weight,dw);
cal_alpha_beta(dw,corr,Data,rank,yt,alpha,beta);
// Generate support vector matrix sv & gradient
obj = (weight.dot(weight) + C*(alpha.dot(yt.cwiseProduct(yt))-beta.dot(yt)))/2;
@@ -251,9 +255,9 @@ int RSVMTN::train(RidList &D){
return 0;
};
-int RSVMTN::predict(DataList &D, vector<double> &res){
+int RSVMTN::predict(RidList &D, vector<double> &res){
res.clear();
for (int i=0;i<D.getSize();++i)
- res.push_back(((D.getData()[i])->feature).dot(model.weight));
+ res.push_back(D.getVec(i).dot(model.weight));
return 0;
}; \ No newline at end of file
diff --git a/model/ranksvmtn.h b/model/ranksvmtn.h
index c98e581..97579b3 100644
--- a/model/ranksvmtn.h
+++ b/model/ranksvmtn.h
@@ -13,7 +13,7 @@ public:
return "TN";
};
virtual int train(RidList &D);
- virtual int predict(DataList &D,std::vector<double> &res);
+ virtual int predict(RidList &D,std::vector<double> &res);
};
#endif \ No newline at end of file
diff --git a/tools/dataProvider.h b/tools/dataProvider.h
index 586965e..59b989a 100644
--- a/tools/dataProvider.h
+++ b/tools/dataProvider.h
@@ -82,9 +82,23 @@ public:
else
uniq.push_back(d);
}
+ inline DataEntry* getU(int x)
+ {
+ return uniq[x];
+ }
+ inline DataEntry* getO(int x)
+ {
+ return other[x];
+ }
+ inline std::string getQid(int x)
+ {
+ int a,b,n=getqSize();
+ a=x/n;
+ return getU(a)->qid;
+ }
inline int getqSize()
{
- return (int)(uniq.size()+other.size()-1);
+ return (int)other.size();
}
inline int getuSize()
{
@@ -99,24 +113,14 @@ public:
a=x/n;
b=x%n;
Eigen::VectorXd vec;
- if (b<a)
- vec=uniq[a]->feature-uniq[b]->feature;
- else
- if (b<uniq.size()-1)
- vec=uniq[a]->feature-uniq[b+1]->feature;
- else
- vec=uniq[a]->feature-other[b-uniq.size()+1]->feature;
- return vec.cwiseAbs();
+ return (uniq[a]->feature-other[b]->feature).cwiseAbs();
};
inline double getL(int x){
int a,b,n=getqSize();
a=x/n;
b=x%n;
- if (b<uniq.size()-1)
- return -1;
- else
- if (std::fabs(other[b-uniq.size()+1]->rank - a) < 1e-5)
- return 1;
+ if (std::fabs(other[b]->rank - a) < 1e-5)
+ return 1;
return -1;
};
};
diff --git a/train.cpp b/train.cpp
index 039c93a..0b5b4d4 100644
--- a/train.cpp
+++ b/train.cpp
@@ -43,7 +43,7 @@ int predict(DataProvider &dp) {
rsvm = RSVM::loadModel(vm["model"].as<string>().c_str());
dp.open();
- DataList D;
+ RidList D;
vector<double> L;
CMC cmc;
LOG(INFO)<<"Prediction started";
@@ -59,24 +59,21 @@ int predict(DataProvider &dp) {
else
ot=&cout;
- while (!dp.EOFile())
- {
- dp.getDataSet(D);
- LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features";
- rsvm->predict(D,L);
-
- if (vm.count("validate"))
- {
- rank_accu(D,L);
- if (vm.count("cmc"))
- rank_CMC(D,L,cmc);
- }
+ dp.getAllDataSet(D);
+ LOG(INFO)<<"Read "<<D.getSize()<<" entries with "<< D.getfSize()<<" features";
+ rsvm->predict(D,L);
- if (vm.count("output") && vm.count("predict"))
- for (int i=0; i<L.size();++i)
- *ot<<L[i]<<endl;
+ if (vm.count("validate"))
+ {
+ rank_accu(D,L);
+ if (vm.count("cmc"))
+ rank_CMC(D,L,cmc);
}
+ if (vm.count("output") && vm.count("predict"))
+ for (int i=0; i<L.size();++i)
+ *ot<<L[i]<<endl;
+
LOG(INFO)<<"Finished";
if (vm.count("cmc"))
{