rename

author: Joe Zhao <ztuowen@gmail.com> 2015-06-04 00:39:54 +0800
committer: Joe Zhao <ztuowen@gmail.com> 2015-06-04 00:39:54 +0800
commit: dae9241190d658f44af6e3f8d7de107f03771797 (patch)
tree: 9778d53122e9580d0b82719cbf9e411d25e59c6e /ensemble.py
parent: f44d718fad470012ae634cf0fd5046b1307620d7 (diff)
download: cross-dae9241190d658f44af6e3f8d7de107f03771797.tar.gz
cross-dae9241190d658f44af6e3f8d7de107f03771797.tar.bz2
cross-dae9241190d658f44af6e3f8d7de107f03771797.zip
1 files changed, 127 insertions, 0 deletions
diff --git a/ensemble.py b/ensemble.py
new file mode 100644
index 0000000..34b4024
--- /dev/null
+++ b/ensemble.py
@@ -0,0 +1,127 @@
+from misc import *
+import math
+
+def onefold(inr,inm,tot,clist,folds,resm)
+
+#inr="a.rid"
+#resm="res.m"
+#consts used
+#clist=[0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, 100, 1000]
+
+#folds=4
+#tot=316
+    step = tot // folds
+#inm="0.m"
+
+    ourb=inr
+#splits
+    print("splitting")
+    for i in range(folds-1):
+        inra=ourb
+        oura="a%d.rid"%i
+        ourb="b%d.rid"%i
+        params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+        split(params)
+
+    entries=['0','0']
+
+    tmp=take("b0.rid")
+    put("c0.rid",tmp)
+
+    tmp=take("b%d.rid"%(folds-2))
+    put("a%d.rid"%(folds-1),tmp)
+    print("merging")
+    for a in range(folds-1):
+        tmp=take("a%d.rid"%a)
+        entries = merge(tmp,entries)
+            
+        if a<folds-2:
+            tmp=take("b%d.rid"%(a+1))
+            tmp = merge(tmp,entries)
+        else:
+            tmp = entries;
+            
+        rid="c%d.rid" %(a+1)
+        put(rid,tmp)
+
+    for i in range(folds):
+        entries = take("a%d.rid"%a)
+        inra="c%d.rid"%i
+        oura="a%d.rid"%i
+        ourb="b%d.rid"%i
+        params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+        split(params)
+        entries = merge(take("a%d.rid"%a),entries)
+    print("completed")
+    wlist=[]
+    mai=0
+#train
+    for i in range(folds):
+        for c in clist:
+            print("folds: %d ,c: %g"%(i,c))
+            wlist.append("%d-%g"%(i,c))
+            oum="%d-%g.m" % (i,c)
+            rid = "a%d.rid"%i
+            params = "-T -d -m %s -i %s -o %s -c %g" % (inm,rid,oum,c)
+            #train(params)
+            oup="%d-%g.p"%(i,c)
+            params = "-P -p -m %s -i %s -o %s" %(oum,inr,oup)
+            #bare(params)
+            for p in getpred(oup):
+                mai=max(abs(p),mai);
+
+    mai=mai*2;
+
+#inits
+    D=getpred(oup)
+    for i in range(len(D)):
+        D[i]=1/len(D);
+    mod=getmodel(oum);
+    for i in range(len(mod)):
+        mod[i]=0;
+
+    while len(wlist)>0:
+        low=1e20
+        k=0
+        P=[]
+        #find best weak ranker
+        for w in wlist:
+            t=0
+            pr=getpred(w+".p")
+            for (d,p) in zip(D,pr):
+                if p<=0:
+                    t+=d
+            if t<low:
+                low=t
+                k=w
+                P=pr
+        
+        print(k)
+        wlist.remove(k)
+        # cal alpha
+        r=0;
+        for (d,p) in zip(D,P):
+            r+=d*p;
+        r=r/mai;
+        a=0.5*math.log((1+r)/(1-r))
+        
+        #update model
+        tmod=getmodel(k+".m")
+        for i in range(len(mod)):
+            mod[i]+=a*tmod[i];
+        
+        #update D
+        for i in range(len(D)):
+            D[i]=D[i]*math.exp(-a*P[i]);
+        
+        #normalize D
+        acc=0;
+        for d in D:
+            acc+=d;
+        
+        for i in range(len(D)):
+            D[i]/=acc;
+
+#output model
+    #print(mod)
+    putmodel(resm,mod)
author	Joe Zhao <ztuowen@gmail.com>	2015-06-04 00:39:54 +0800
committer	Joe Zhao <ztuowen@gmail.com>	2015-06-04 00:39:54 +0800
commit	dae9241190d658f44af6e3f8d7de107f03771797 (patch)
tree	9778d53122e9580d0b82719cbf9e411d25e59c6e /ensemble.py
parent	f44d718fad470012ae634cf0fd5046b1307620d7 (diff)
download	cross-dae9241190d658f44af6e3f8d7de107f03771797.tar.gz cross-dae9241190d658f44af6e3f8d7de107f03771797.tar.bz2 cross-dae9241190d658f44af6e3f8d7de107f03771797.zip