summaryrefslogtreecommitdiff
path: root/ensemble.py
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2015-06-04 00:39:54 +0800
committerJoe Zhao <ztuowen@gmail.com>2015-06-04 00:39:54 +0800
commitdae9241190d658f44af6e3f8d7de107f03771797 (patch)
tree9778d53122e9580d0b82719cbf9e411d25e59c6e /ensemble.py
parentf44d718fad470012ae634cf0fd5046b1307620d7 (diff)
downloadcross-dae9241190d658f44af6e3f8d7de107f03771797.tar.gz
cross-dae9241190d658f44af6e3f8d7de107f03771797.tar.bz2
cross-dae9241190d658f44af6e3f8d7de107f03771797.zip
rename
Diffstat (limited to 'ensemble.py')
-rw-r--r--ensemble.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/ensemble.py b/ensemble.py
new file mode 100644
index 0000000..34b4024
--- /dev/null
+++ b/ensemble.py
@@ -0,0 +1,127 @@
+from misc import *
+import math
+
+def onefold(inr,inm,tot,clist,folds,resm)
+
+#inr="a.rid"
+#resm="res.m"
+#consts used
+#clist=[0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, 100, 1000]
+
+#folds=4
+#tot=316
+ step = tot // folds
+#inm="0.m"
+
+ ourb=inr
+#splits
+ print("splitting")
+ for i in range(folds-1):
+ inra=ourb
+ oura="a%d.rid"%i
+ ourb="b%d.rid"%i
+ params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+ split(params)
+
+ entries=['0','0']
+
+ tmp=take("b0.rid")
+ put("c0.rid",tmp)
+
+ tmp=take("b%d.rid"%(folds-2))
+ put("a%d.rid"%(folds-1),tmp)
+ print("merging")
+ for a in range(folds-1):
+ tmp=take("a%d.rid"%a)
+ entries = merge(tmp,entries)
+
+ if a<folds-2:
+ tmp=take("b%d.rid"%(a+1))
+ tmp = merge(tmp,entries)
+ else:
+ tmp = entries;
+
+ rid="c%d.rid" %(a+1)
+ put(rid,tmp)
+
+ for i in range(folds):
+ entries = take("a%d.rid"%a)
+ inra="c%d.rid"%i
+ oura="a%d.rid"%i
+ ourb="b%d.rid"%i
+ params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+ split(params)
+ entries = merge(take("a%d.rid"%a),entries)
+ print("completed")
+ wlist=[]
+ mai=0
+#train
+ for i in range(folds):
+ for c in clist:
+ print("folds: %d ,c: %g"%(i,c))
+ wlist.append("%d-%g"%(i,c))
+ oum="%d-%g.m" % (i,c)
+ rid = "a%d.rid"%i
+ params = "-T -d -m %s -i %s -o %s -c %g" % (inm,rid,oum,c)
+ #train(params)
+ oup="%d-%g.p"%(i,c)
+ params = "-P -p -m %s -i %s -o %s" %(oum,inr,oup)
+ #bare(params)
+ for p in getpred(oup):
+ mai=max(abs(p),mai);
+
+ mai=mai*2;
+
+#inits
+ D=getpred(oup)
+ for i in range(len(D)):
+ D[i]=1/len(D);
+ mod=getmodel(oum);
+ for i in range(len(mod)):
+ mod[i]=0;
+
+ while len(wlist)>0:
+ low=1e20
+ k=0
+ P=[]
+ #find best weak ranker
+ for w in wlist:
+ t=0
+ pr=getpred(w+".p")
+ for (d,p) in zip(D,pr):
+ if p<=0:
+ t+=d
+ if t<low:
+ low=t
+ k=w
+ P=pr
+
+ print(k)
+ wlist.remove(k)
+ # cal alpha
+ r=0;
+ for (d,p) in zip(D,P):
+ r+=d*p;
+ r=r/mai;
+ a=0.5*math.log((1+r)/(1-r))
+
+ #update model
+ tmod=getmodel(k+".m")
+ for i in range(len(mod)):
+ mod[i]+=a*tmod[i];
+
+ #update D
+ for i in range(len(D)):
+ D[i]=D[i]*math.exp(-a*P[i]);
+
+ #normalize D
+ acc=0;
+ for d in D:
+ acc+=d;
+
+ for i in range(len(D)):
+ D[i]/=acc;
+
+#output model
+ #print(mod)
+ putmodel(resm,mod)