2 files changed, 154 insertions, 1 deletions
diff --git a/ensemble-train.py b/ensemble-train.py
new file mode 100644
index 0000000..1ceb380
--- /dev/null
+++ b/ensemble-train.py
@@ -0,0 +1,124 @@
+from misc import *
+import math
+inr="a.rid"
+resm="res.m"
+#consts used
+clist=[0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, 100, 1000]
+
+folds=4
+tot=316
+step = tot // folds
+inm="0.m"
+
+ourb=inr
+#splits
+print("splitting")
+for i in range(folds-1):
+    inra=ourb
+    oura="a%d.rid"%i
+    ourb="b%d.rid"%i
+    params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+    split(params)
+
+entries=['0','0']
+
+tmp=take("b0.rid")
+put("c0.rid",tmp)
+
+tmp=take("b%d.rid"%(folds-2))
+put("a%d.rid"%(folds-1),tmp)
+print("merging")
+for a in range(folds-1):
+    tmp=take("a%d.rid"%a)
+    entries = merge(tmp,entries)
+	
+    if a<folds-2:
+        tmp=take("b%d.rid"%(a+1))
+        tmp = merge(tmp,entries)
+    else:
+        tmp = entries;
+	
+    rid="c%d.rid" %(a+1)
+    put(rid,tmp)
+
+for i in range(folds):
+    entries = take("a%d.rid"%a)
+    inra="c%d.rid"%i
+    oura="a%d.rid"%i
+    ourb="b%d.rid"%i
+    params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+    split(params)
+    entries = merge(take("a%d.rid"%a),entries)
+print("completed")
+wlist=[]
+mai=0
+#train
+for i in range(folds):
+    for c in clist:
+        print("folds: %d ,c: %g"%(i,c))
+        wlist.append("%d-%g"%(i,c))
+        oum="%d-%g.m" % (i,c)
+        rid = "a%d.rid"%i
+        params = "-T -d -m %s -i %s -o %s -c %g" % (inm,rid,oum,c)
+        #train(params)
+        oup="%d-%g.p"%(i,c)
+        params = "-P -p -m %s -i %s -o %s" %(oum,inr,oup)
+        #bare(params)
+        for p in getpred(oup):
+            mai=max(abs(p),mai);
+
+mai=mai*2;
+
+#inits
+D=getpred(oup)
+for i in range(len(D)):
+    D[i]=1/len(D);
+mod=getmodel(oum);
+for i in range(len(mod)):
+    mod[i]=0;
+
+while len(wlist)>0:
+    low=1e20
+    k=0
+    P=[]
+    #find best weak ranker
+    for w in wlist:
+        t=0
+        pr=getpred(w+".p")
+        for (d,p) in zip(D,pr):
+            if p<=0:
+                t+=d
+        if t<low:
+            low=t
+            k=w
+            P=pr
+    
+    print(k)
+    wlist.remove(k)
+    # cal alpha
+    r=0;
+    for (d,p) in zip(D,P):
+        r+=d*p;
+    r=r/mai;
+    a=0.5*math.log((1+r)/(1-r))
+    
+    #update model
+    tmod=getmodel(k+".m")
+    for i in range(len(mod)):
+        mod[i]+=a*tmod[i];
+    
+    #update D
+    for i in range(len(D)):
+        D[i]=D[i]*math.exp(-a*P[i]);
+    
+    #normalize D
+    acc=0;
+    for d in D:
+        acc+=d;
+    
+    for i in range(len(D)):
+        D[i]/=acc;
+
+#output model
+print(mod)
+putmodel(resm,mod)
diff --git a/misc.py b/misc.py
index 1a946ed..84ce869 100644
--- a/misc.py
+++ b/misc.py
@@ -6,8 +6,11 @@ def split(params):
     call(["./split"]+params.split(" "),stdout=devnull)
 
 def train(params):
+    params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10 --prec 1e-4"
+    bare(params)
+
+def bare(params):
     devnull = open(os.devnull, 'w')
-    params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10"
     call(["./ranksvm"]+params.split(" "),stdout=devnull)
 
 def cmc(params):
@@ -29,3 +32,29 @@ def put(fname,a):
     for item in a:
         f.write("%s\n" % item)
     f.close()
+
+def getmodel(fname):
+    f=open(fname,'r')
+    res=f.read().split('\n')
+    f.close()
+    mod=[];
+    fsize=int(res[1]);
+    for i in range(fsize):
+        mod.append(float(res[3+i]))
+    return mod;
+
+def putmodel(fname,m):
+    f=open(fname,'w')
+    f.write("TN\n")
+    f.write("%d\n" % len(m))
+    f.write("%d 1\n"%len(m))
+    for a in m:
+        f.write("%g\n"%a)
+    f.write("0")
+    f.close()
+
+def getpred(fname):
+    f=open(fname,'r')
+    res=f.read().split('\n')
+    f.close()
+    return [float(p) for p in res[:-1]]