1 files changed, 117 insertions, 81 deletions
diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp
index d6898d1..fec7935 100644
--- a/model/ranksvmtn.cpp
+++ b/model/ranksvmtn.cpp
@@ -9,47 +9,54 @@ using namespace Eigen;
 const int maxiter = 10;
 const double prec=1e-4;
 const double C=1;
+const double cg_prec=1e-10;
+const double line_prec=1e-10;
+const double line_turb=1e-12;
 
-int computeHs(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &pred,const double &C,const VectorXd s,VectorXd &Hs)
+int cal_Hs(const MatrixXd &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alpha,const vector<int> &A1,const vector<int> &A2,const double &C,const VectorXd s,VectorXd &Hs)
 {
     Hs = VectorXd::Zero(s.rows());
     VectorXd Ds=D*s;
-    long n = A1.size();
-    VectorXd Xs(n);
-    for (int i=0;i<n;++i)
-        Xs(i) = Ds(A1[i]) - Ds(A2[i]);
-    VectorXd ADXs = VectorXd::Zero(D.rows());
-    for (int i=0;i<n;++i)
-        if (pred(i)>0)
-        {
-            ADXs(A1[i]) = ADXs(A1[i]) + Xs(i);
-            ADXs(A2[i]) = ADXs(A2[i]) - Xs(i);
-        }
-    Hs = s + (C*(D.transpose()*ADXs));
+    VectorXd gamma(D.rows());
+    for (int i=0;i<A1.size();++i)
+    {
+        double g=0;
+        for (int j = A1[i];j<=A2[i];++j)
+            if (corr[rank[j]]<0)
+                gamma[rank[j]]=g;
+            else
+                g+=Ds[rank[j]];
+        g=0;
+        for (int j = A2[i];j>=A1[i];--j)
+            if (corr[rank[j]]>0)
+                gamma[rank[j]]=g;
+            else
+                g+=Ds[rank[j]];
+    }
+    Hs = s + (D.transpose()*(alpha.cwiseProduct(Ds) - gamma));
     return 0;
 }
 
-int cg_solve(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &pred, const VectorXd &b,const double &C, VectorXd &x)
+int cg_solve(const MatrixXd &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alph,const vector<int> &A1,const vector<int> &A2,const VectorXd &b,const double &C, VectorXd &x)
 {
     double alpha,beta,r_1,r_2;
     int step=0;
     VectorXd q;
     VectorXd Hs;
-    computeHs(D,A1,A2,pred,C,x,Hs);
+    cal_Hs(D,rank,corr,alph,A1,A2,C,x,Hs);
     VectorXd res = b - Hs;
     VectorXd p = res;
     while (1)
     {
         // Non preconditioned version
         r_1 = res.dot(res);
-        if (r_1<1e-10) // Terminate condition
+        if (r_1<cg_prec) // Terminate condition
             break;
         if (step){
             beta = r_1 / r_2;
             p = res + p * beta;
         }
-        computeHs(D,A1,A2,pred,C,p,Hs);
-        q = Hs;
+        cal_Hs(D,rank,corr,alph,A1,A2,C,p,q);
         alpha = r_1/p.dot(q);
         x=x+p*alpha;
         res=res-q*alpha;
@@ -59,66 +66,112 @@ int cg_solve(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const
     return 0;
 }
 
-// Calculate objfunc gradient & support vectors
-int objfunc_linear(const VectorXd &w,const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const double C,VectorXd &pred,VectorXd &grad, double &obj)
+void ranksort(int l,int r,vector<int> &rank,VectorXd &ref)
 {
-    for (int i=0;i<pred.rows();++i)
-        pred(i)=pred(i)>0?pred(i):0;
-    obj = (pred.cwiseProduct(pred)*C).sum()/2 + w.dot(w)/2;
-    VectorXd pA = VectorXd::Zero(D.rows());
-    for (int i=0;i<A1.size();++i) {
-        pA(A1[i]) = pA(A1[i]) + pred(i);
-        pA(A2[i]) = pA(A2[i]) - pred(i);
+    int i=l,j=r,k;
+    double mid=ref(rank[(l+r)>>1]);
+    while (i<=j)
+    {
+        while (ref[rank[i]]<mid) ++i;
+        while (ref[rank[j]]>mid) --j;
+        if (i<=j)
+        {
+            k=rank[i];
+            rank[i]=rank[j];
+            rank[j]=k;
+            ++i;
+            --j;
+        }
+    }
+    if (j>l)
+        ranksort(l,j,rank,ref);
+    if (i<r)
+        ranksort(i,r,rank,ref);
+}
+
+int cal_alpha_beta(const VectorXd &dw,const VectorXd &corr,const vector<int> &A1,const vector<int> &A2,vector<int> &rank,VectorXd &yt,VectorXd &alpha,VectorXd &beta)
+{
+    long n = dw.rows();
+    yt = dw - corr;
+    alpha=VectorXd::Zero(n);
+    beta=VectorXd::Zero(n);
+    for (int i=0;i<n;++i) rank[i]=i;
+    for (int i=0;i<A1.size();++i)
+    {
+        ranksort(A1[i],A2[i],rank,yt);
+        double a=0,b=0;
+        for (int j=A1[i];j<=A2[i];++j)
+            if (corr[rank[j]]<0)
+            {
+                alpha[rank[j]]=a;
+                beta[rank[j]]=b;
+            }
+            else
+            {
+                a+=1;
+                b+=yt[rank[j]];
+            }
+        a=b=0;
+        for (int j=A2[i];j>=A1[i];--j)
+            if (corr[rank[j]]>0)
+            {
+                alpha[rank[j]]=a;
+                beta[rank[j]]=b;
+            }
+            else
+            {
+                a+=1;
+                b+=yt[rank[j]];
+            }
     }
-    grad = w - (pA.transpose()*D).transpose();
-    return 0;
 }
 
 // line search using newton method
-int line_search(const VectorXd &w,const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &step,VectorXd &pred,double &t)
+int line_search(const VectorXd &w,const MatrixXd &D,const VectorXd &corr,const vector<int> &A1,const vector<int> &A2,const VectorXd &step,double &t)
 {
     double wd=w.dot(step),dd=step.dot(step);
     VectorXd Dd = D*step;
     VectorXd Xd = VectorXd::Zero(A1.size());
+    VectorXd alpha,beta,yt;
+    VectorXd grad;
+    VectorXd Hs;
+    vector<int> rank(D.rows());
+
     for (int i=0;i<A1.size();++i)
         Xd(i) = Dd(A1[i])-Dd(A2[i]);
     double g,h;
     t = 0;
-    VectorXd pred2;
     while (1)
     {
-        pred2 = pred - t*Xd;
-        g=wd+t*dd;
-        h=dd;
-        for (int i=0;i<pred2.rows();++i)
-            if (pred2(i)>0) {
-                g -= C*pred2(i)*Xd(i);
-                h += C*Xd(i)*Xd(i);
-            }
-        g=g+1e-12;
-        h=h+1e-12;
+        grad=w+t*step;
+        Dd = D*(w + t*step);
+        cal_alpha_beta(Dd,corr,A1,A2,rank,yt,alpha,beta);
+        grad = grad + (D.transpose()*(alpha.cwiseProduct(yt)-beta));
+        g = grad.dot(step);
+        cal_Hs(D,rank,corr,alpha,A1,A2,C,step,Hs);
+        h = Hs.dot(step);
+        g=g+line_turb;
+        h = h+line_turb;
         t=t-g/h;
-        if (g*g/h<1e-10)
+        if (g*g/h<line_prec)
             break;
     }
-    pred=pred2;
     return 0;
 }
 
-int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &weight){
+int train_orig(int fsize, MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &corr,VectorXd &weight){
     int iter = 0;
 
-    long n=A1.size();
-    LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Relation size:" << A1.size();
+    long n=D.rows();
+    LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Query size:" << A1.size();
     VectorXd grad(fsize);
     VectorXd step(fsize);
-    VectorXd pred(n);
+    vector<int> rank(n);
     double obj,t;
 
     VectorXd dw = D*weight;
-    pred=VectorXd::Zero(n);
-    for (int i=0;i<n;++i)
-        pred(i) = 1 - dw(A1[i])+dw(A2[i]);
+    VectorXd yt;
+    VectorXd alpha,beta;
     while (true)
     {
         iter+=1;
@@ -128,21 +181,21 @@ int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &
             break;
         }
 
+        dw = D*weight;
+
+        cal_alpha_beta(dw,corr,A1,A2,rank,yt,alpha,beta);
+
         // Generate support vector matrix sv & gradient
-        objfunc_linear(weight,D,A1,A2,C,pred,grad,obj);
+        obj = (weight.dot(weight) + (alpha.dot(yt.cwiseProduct(yt))-beta.dot(yt)))/2;//
+        grad = weight + (D.transpose()*(alpha.cwiseProduct(yt)-beta));
         step = grad*0;
         // Solve
-        cg_solve(D,A1,A2,pred,grad,C,step);
+        cg_solve(D,rank,corr,alpha,A1,A2,grad,C,step);
         // do line search
-
-        line_search(weight,D,A1,A2,step,pred,t);
+        line_search(weight,D,corr,A1,A2,step,t);
         weight=weight+step*t;
-        int sv=0;
-        for (int i=0;i<n;++i)
-            if (pred(i)>0)
-                ++sv;
         // When dec is small enough
-        LOG(INFO)<<"Iter: "<<iter<<" Obj: " <<obj<<" SV: "<< sv << " Newton decr:"<<step.dot(grad)/2 << " linesearch: "<< -t ;
+        LOG(INFO)<<"Iter: "<<iter<<" Obj: " <<obj << " Newton decr:"<<step.dot(grad)/2 << " linesearch: "<< -t ;
         if (step.dot(grad) < prec * obj)
             break;
     }
@@ -151,44 +204,27 @@ int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &
 
 int RSVMTN::train(DataList &D){
     MatrixXd Data(D.getSize(),D.getfSize());
+    VectorXd corr(D.getSize());
     vector<int> A1,A2;
     int i,j;
     LOG(INFO)<<"Processing input";
     for (i=0;i<D.getSize();++i) {
+        corr(i)=(D.getData()[i])->rank>0?0.5:-0.5;
         for (j = 0; j < D.getfSize(); ++j)
             Data(i, j) = (D.getData()[i])->feature(j);
     }
-    int cnt=0;
     i=j=0;
     while (i<D.getSize())
     {
         if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid)
         {
-            int high=j;
-            while (D.getData()[high]->rank>0)
-                ++high;
-            cnt += (high-j)*(i-high+1);
-            j = i+1;
-        }
-        ++i;
-    }
-    cnt=i=j=0;
-    while (i<D.getSize())
-    {
-        if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid)
-        {
-            int v1=j,v2;
-            for (v1=j;(D.getData()[v1]->rank)>0;++v1)
-                for (v2=i;(D.getData()[v2]->rank)<0;--v2) {
-                    A1.push_back(v1);
-                    A2.push_back(v2);
-                    ++cnt;
-                }
+            A1.push_back(j);
+            A2.push_back(i);
             j = i+1;
         }
         ++i;
     }
-    train_orig(fsize,Data,A1,A2,model.weight);
+    train_orig(fsize,Data,A1,A2,corr,model.weight);
     return 0;
 };