diff options
author | Joe Zhao <ztuowen@gmail.com> | 2015-06-03 10:44:45 +0800 |
---|---|---|
committer | Joe Zhao <ztuowen@gmail.com> | 2015-06-03 10:44:45 +0800 |
commit | 28af3b75083382c29a56a34dc2f6e0051606e3c7 (patch) | |
tree | 69b8092e0fbe868757cffd7f865b67dae4f2b96d | |
parent | 229a3fd8fe5c83cd2230716dbd72be594c7be798 (diff) | |
download | cross-28af3b75083382c29a56a34dc2f6e0051606e3c7.tar.gz cross-28af3b75083382c29a56a34dc2f6e0051606e3c7.tar.bz2 cross-28af3b75083382c29a56a34dc2f6e0051606e3c7.zip |
ensemble
-rw-r--r-- | ensemble-train.py | 124 | ||||
-rw-r--r-- | misc.py | 31 |
2 files changed, 154 insertions, 1 deletions
diff --git a/ensemble-train.py b/ensemble-train.py new file mode 100644 index 0000000..1ceb380 --- /dev/null +++ b/ensemble-train.py @@ -0,0 +1,124 @@ +from misc import * +import math +inr="a.rid" +resm="res.m" +#consts used +clist=[0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, 100, 1000] + +folds=4 +tot=316 +step = tot // folds +inm="0.m" + +ourb=inr +#splits +print("splitting") +for i in range(folds-1): + inra=ourb + oura="a%d.rid"%i + ourb="b%d.rid"%i + params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb) + split(params) + +entries=['0','0'] + +tmp=take("b0.rid") +put("c0.rid",tmp) + +tmp=take("b%d.rid"%(folds-2)) +put("a%d.rid"%(folds-1),tmp) +print("merging") +for a in range(folds-1): + tmp=take("a%d.rid"%a) + entries = merge(tmp,entries) + + if a<folds-2: + tmp=take("b%d.rid"%(a+1)) + tmp = merge(tmp,entries) + else: + tmp = entries; + + rid="c%d.rid" %(a+1) + put(rid,tmp) + +for i in range(folds): + entries = take("a%d.rid"%a) + inra="c%d.rid"%i + oura="a%d.rid"%i + ourb="b%d.rid"%i + params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb) + split(params) + entries = merge(take("a%d.rid"%a),entries) +print("completed") +wlist=[] +mai=0 +#train +for i in range(folds): + for c in clist: + print("folds: %d ,c: %g"%(i,c)) + wlist.append("%d-%g"%(i,c)) + oum="%d-%g.m" % (i,c) + rid = "a%d.rid"%i + params = "-T -d -m %s -i %s -o %s -c %g" % (inm,rid,oum,c) + #train(params) + oup="%d-%g.p"%(i,c) + params = "-P -p -m %s -i %s -o %s" %(oum,inr,oup) + #bare(params) + for p in getpred(oup): + mai=max(abs(p),mai); + +mai=mai*2; + +#inits +D=getpred(oup) +for i in range(len(D)): + D[i]=1/len(D); +mod=getmodel(oum); +for i in range(len(mod)): + mod[i]=0; + +while len(wlist)>0: + low=1e20 + k=0 + P=[] + #find best weak ranker + for w in wlist: + t=0 + pr=getpred(w+".p") + for (d,p) in zip(D,pr): + if p<=0: + t+=d + if t<low: + low=t + k=w + P=pr + + print(k) + wlist.remove(k) + # cal alpha + r=0; + for (d,p) in zip(D,P): + r+=d*p; + r=r/mai; + a=0.5*math.log((1+r)/(1-r)) + + #update model + tmod=getmodel(k+".m") + for i in range(len(mod)): + mod[i]+=a*tmod[i]; + + #update D + for i in range(len(D)): + D[i]=D[i]*math.exp(-a*P[i]); + + #normalize D + acc=0; + for d in D: + acc+=d; + + for i in range(len(D)): + D[i]/=acc; + +#output model +print(mod) +putmodel(resm,mod) @@ -6,8 +6,11 @@ def split(params): call(["./split"]+params.split(" "),stdout=devnull) def train(params): + params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10 --prec 1e-4" + bare(params) + +def bare(params): devnull = open(os.devnull, 'w') - params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10" call(["./ranksvm"]+params.split(" "),stdout=devnull) def cmc(params): @@ -29,3 +32,29 @@ def put(fname,a): for item in a: f.write("%s\n" % item) f.close() + +def getmodel(fname): + f=open(fname,'r') + res=f.read().split('\n') + f.close() + mod=[]; + fsize=int(res[1]); + for i in range(fsize): + mod.append(float(res[3+i])) + return mod; + +def putmodel(fname,m): + f=open(fname,'w') + f.write("TN\n") + f.write("%d\n" % len(m)) + f.write("%d 1\n"%len(m)) + for a in m: + f.write("%g\n"%a) + f.write("0") + f.close() + +def getpred(fname): + f=open(fname,'r') + res=f.read().split('\n') + f.close() + return [float(p) for p in res[:-1]] |