diff options
| -rw-r--r-- | model/ranksvmtn.cpp | 198 | 
1 files changed, 117 insertions, 81 deletions
diff --git a/model/ranksvmtn.cpp b/model/ranksvmtn.cpp index d6898d1..fec7935 100644 --- a/model/ranksvmtn.cpp +++ b/model/ranksvmtn.cpp @@ -9,47 +9,54 @@ using namespace Eigen;  const int maxiter = 10;  const double prec=1e-4;  const double C=1; +const double cg_prec=1e-10; +const double line_prec=1e-10; +const double line_turb=1e-12; -int computeHs(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &pred,const double &C,const VectorXd s,VectorXd &Hs) +int cal_Hs(const MatrixXd &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alpha,const vector<int> &A1,const vector<int> &A2,const double &C,const VectorXd s,VectorXd &Hs)  {      Hs = VectorXd::Zero(s.rows());      VectorXd Ds=D*s; -    long n = A1.size(); -    VectorXd Xs(n); -    for (int i=0;i<n;++i) -        Xs(i) = Ds(A1[i]) - Ds(A2[i]); -    VectorXd ADXs = VectorXd::Zero(D.rows()); -    for (int i=0;i<n;++i) -        if (pred(i)>0) -        { -            ADXs(A1[i]) = ADXs(A1[i]) + Xs(i); -            ADXs(A2[i]) = ADXs(A2[i]) - Xs(i); -        } -    Hs = s + (C*(D.transpose()*ADXs)); +    VectorXd gamma(D.rows()); +    for (int i=0;i<A1.size();++i) +    { +        double g=0; +        for (int j = A1[i];j<=A2[i];++j) +            if (corr[rank[j]]<0) +                gamma[rank[j]]=g; +            else +                g+=Ds[rank[j]]; +        g=0; +        for (int j = A2[i];j>=A1[i];--j) +            if (corr[rank[j]]>0) +                gamma[rank[j]]=g; +            else +                g+=Ds[rank[j]]; +    } +    Hs = s + (D.transpose()*(alpha.cwiseProduct(Ds) - gamma));      return 0;  } -int cg_solve(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &pred, const VectorXd &b,const double &C, VectorXd &x) +int cg_solve(const MatrixXd &D,const vector<int> &rank,const VectorXd &corr,const VectorXd &alph,const vector<int> &A1,const vector<int> &A2,const VectorXd &b,const double &C, VectorXd &x)  {      double alpha,beta,r_1,r_2;      int step=0;      VectorXd q;      VectorXd Hs; -    computeHs(D,A1,A2,pred,C,x,Hs); +    cal_Hs(D,rank,corr,alph,A1,A2,C,x,Hs);      VectorXd res = b - Hs;      VectorXd p = res;      while (1)      {          // Non preconditioned version          r_1 = res.dot(res); -        if (r_1<1e-10) // Terminate condition +        if (r_1<cg_prec) // Terminate condition              break;          if (step){              beta = r_1 / r_2;              p = res + p * beta;          } -        computeHs(D,A1,A2,pred,C,p,Hs); -        q = Hs; +        cal_Hs(D,rank,corr,alph,A1,A2,C,p,q);          alpha = r_1/p.dot(q);          x=x+p*alpha;          res=res-q*alpha; @@ -59,66 +66,112 @@ int cg_solve(const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const      return 0;  } -// Calculate objfunc gradient & support vectors -int objfunc_linear(const VectorXd &w,const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const double C,VectorXd &pred,VectorXd &grad, double &obj) +void ranksort(int l,int r,vector<int> &rank,VectorXd &ref)  { -    for (int i=0;i<pred.rows();++i) -        pred(i)=pred(i)>0?pred(i):0; -    obj = (pred.cwiseProduct(pred)*C).sum()/2 + w.dot(w)/2; -    VectorXd pA = VectorXd::Zero(D.rows()); -    for (int i=0;i<A1.size();++i) { -        pA(A1[i]) = pA(A1[i]) + pred(i); -        pA(A2[i]) = pA(A2[i]) - pred(i); +    int i=l,j=r,k; +    double mid=ref(rank[(l+r)>>1]); +    while (i<=j) +    { +        while (ref[rank[i]]<mid) ++i; +        while (ref[rank[j]]>mid) --j; +        if (i<=j) +        { +            k=rank[i]; +            rank[i]=rank[j]; +            rank[j]=k; +            ++i; +            --j; +        } +    } +    if (j>l) +        ranksort(l,j,rank,ref); +    if (i<r) +        ranksort(i,r,rank,ref); +} + +int cal_alpha_beta(const VectorXd &dw,const VectorXd &corr,const vector<int> &A1,const vector<int> &A2,vector<int> &rank,VectorXd &yt,VectorXd &alpha,VectorXd &beta) +{ +    long n = dw.rows(); +    yt = dw - corr; +    alpha=VectorXd::Zero(n); +    beta=VectorXd::Zero(n); +    for (int i=0;i<n;++i) rank[i]=i; +    for (int i=0;i<A1.size();++i) +    { +        ranksort(A1[i],A2[i],rank,yt); +        double a=0,b=0; +        for (int j=A1[i];j<=A2[i];++j) +            if (corr[rank[j]]<0) +            { +                alpha[rank[j]]=a; +                beta[rank[j]]=b; +            } +            else +            { +                a+=1; +                b+=yt[rank[j]]; +            } +        a=b=0; +        for (int j=A2[i];j>=A1[i];--j) +            if (corr[rank[j]]>0) +            { +                alpha[rank[j]]=a; +                beta[rank[j]]=b; +            } +            else +            { +                a+=1; +                b+=yt[rank[j]]; +            }      } -    grad = w - (pA.transpose()*D).transpose(); -    return 0;  }  // line search using newton method -int line_search(const VectorXd &w,const MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &step,VectorXd &pred,double &t) +int line_search(const VectorXd &w,const MatrixXd &D,const VectorXd &corr,const vector<int> &A1,const vector<int> &A2,const VectorXd &step,double &t)  {      double wd=w.dot(step),dd=step.dot(step);      VectorXd Dd = D*step;      VectorXd Xd = VectorXd::Zero(A1.size()); +    VectorXd alpha,beta,yt; +    VectorXd grad; +    VectorXd Hs; +    vector<int> rank(D.rows()); +      for (int i=0;i<A1.size();++i)          Xd(i) = Dd(A1[i])-Dd(A2[i]);      double g,h;      t = 0; -    VectorXd pred2;      while (1)      { -        pred2 = pred - t*Xd; -        g=wd+t*dd; -        h=dd; -        for (int i=0;i<pred2.rows();++i) -            if (pred2(i)>0) { -                g -= C*pred2(i)*Xd(i); -                h += C*Xd(i)*Xd(i); -            } -        g=g+1e-12; -        h=h+1e-12; +        grad=w+t*step; +        Dd = D*(w + t*step); +        cal_alpha_beta(Dd,corr,A1,A2,rank,yt,alpha,beta); +        grad = grad + (D.transpose()*(alpha.cwiseProduct(yt)-beta)); +        g = grad.dot(step); +        cal_Hs(D,rank,corr,alpha,A1,A2,C,step,Hs); +        h = Hs.dot(step); +        g=g+line_turb; +        h = h+line_turb;          t=t-g/h; -        if (g*g/h<1e-10) +        if (g*g/h<line_prec)              break;      } -    pred=pred2;      return 0;  } -int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &weight){ +int train_orig(int fsize, MatrixXd &D,const vector<int> &A1,const vector<int> &A2,const VectorXd &corr,VectorXd &weight){      int iter = 0; -    long n=A1.size(); -    LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Relation size:" << A1.size(); +    long n=D.rows(); +    LOG(INFO) << "training with feature size:" << fsize << " Data size:" << n << " Query size:" << A1.size();      VectorXd grad(fsize);      VectorXd step(fsize); -    VectorXd pred(n); +    vector<int> rank(n);      double obj,t;      VectorXd dw = D*weight; -    pred=VectorXd::Zero(n); -    for (int i=0;i<n;++i) -        pred(i) = 1 - dw(A1[i])+dw(A2[i]); +    VectorXd yt; +    VectorXd alpha,beta;      while (true)      {          iter+=1; @@ -128,21 +181,21 @@ int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &              break;          } +        dw = D*weight; + +        cal_alpha_beta(dw,corr,A1,A2,rank,yt,alpha,beta); +          // Generate support vector matrix sv & gradient -        objfunc_linear(weight,D,A1,A2,C,pred,grad,obj); +        obj = (weight.dot(weight) + (alpha.dot(yt.cwiseProduct(yt))-beta.dot(yt)))/2;// +        grad = weight + (D.transpose()*(alpha.cwiseProduct(yt)-beta));          step = grad*0;          // Solve -        cg_solve(D,A1,A2,pred,grad,C,step); +        cg_solve(D,rank,corr,alpha,A1,A2,grad,C,step);          // do line search - -        line_search(weight,D,A1,A2,step,pred,t); +        line_search(weight,D,corr,A1,A2,step,t);          weight=weight+step*t; -        int sv=0; -        for (int i=0;i<n;++i) -            if (pred(i)>0) -                ++sv;          // When dec is small enough -        LOG(INFO)<<"Iter: "<<iter<<" Obj: " <<obj<<" SV: "<< sv << " Newton decr:"<<step.dot(grad)/2 << " linesearch: "<< -t ; +        LOG(INFO)<<"Iter: "<<iter<<" Obj: " <<obj << " Newton decr:"<<step.dot(grad)/2 << " linesearch: "<< -t ;          if (step.dot(grad) < prec * obj)              break;      } @@ -151,44 +204,27 @@ int train_orig(int fsize, MatrixXd &D,vector<int> &A1,vector<int> &A2,VectorXd &  int RSVMTN::train(DataList &D){      MatrixXd Data(D.getSize(),D.getfSize()); +    VectorXd corr(D.getSize());      vector<int> A1,A2;      int i,j;      LOG(INFO)<<"Processing input";      for (i=0;i<D.getSize();++i) { +        corr(i)=(D.getData()[i])->rank>0?0.5:-0.5;          for (j = 0; j < D.getfSize(); ++j)              Data(i, j) = (D.getData()[i])->feature(j);      } -    int cnt=0;      i=j=0;      while (i<D.getSize())      {          if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid)          { -            int high=j; -            while (D.getData()[high]->rank>0) -                ++high; -            cnt += (high-j)*(i-high+1); -            j = i+1; -        } -        ++i; -    } -    cnt=i=j=0; -    while (i<D.getSize()) -    { -        if ((i+1 == D.getSize())|| D.getData()[i]->qid!=D.getData()[i+1]->qid) -        { -            int v1=j,v2; -            for (v1=j;(D.getData()[v1]->rank)>0;++v1) -                for (v2=i;(D.getData()[v2]->rank)<0;--v2) { -                    A1.push_back(v1); -                    A2.push_back(v2); -                    ++cnt; -                } +            A1.push_back(j); +            A2.push_back(i);              j = i+1;          }          ++i;      } -    train_orig(fsize,Data,A1,A2,model.weight); +    train_orig(fsize,Data,A1,A2,corr,model.weight);      return 0;  };  | 
