summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ensemble-train.py124
-rw-r--r--misc.py31
2 files changed, 154 insertions, 1 deletions
diff --git a/ensemble-train.py b/ensemble-train.py
new file mode 100644
index 0000000..1ceb380
--- /dev/null
+++ b/ensemble-train.py
@@ -0,0 +1,124 @@
+from misc import *
+import math
+inr="a.rid"
+resm="res.m"
+#consts used
+clist=[0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 10, 100, 1000]
+
+folds=4
+tot=316
+step = tot // folds
+inm="0.m"
+
+ourb=inr
+#splits
+print("splitting")
+for i in range(folds-1):
+ inra=ourb
+ oura="a%d.rid"%i
+ ourb="b%d.rid"%i
+ params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+ split(params)
+
+entries=['0','0']
+
+tmp=take("b0.rid")
+put("c0.rid",tmp)
+
+tmp=take("b%d.rid"%(folds-2))
+put("a%d.rid"%(folds-1),tmp)
+print("merging")
+for a in range(folds-1):
+ tmp=take("a%d.rid"%a)
+ entries = merge(tmp,entries)
+
+ if a<folds-2:
+ tmp=take("b%d.rid"%(a+1))
+ tmp = merge(tmp,entries)
+ else:
+ tmp = entries;
+
+ rid="c%d.rid" %(a+1)
+ put(rid,tmp)
+
+for i in range(folds):
+ entries = take("a%d.rid"%a)
+ inra="c%d.rid"%i
+ oura="a%d.rid"%i
+ ourb="b%d.rid"%i
+ params = "-c %d -i %s -a %s -b %s" %(step,inra,oura,ourb)
+ split(params)
+ entries = merge(take("a%d.rid"%a),entries)
+print("completed")
+wlist=[]
+mai=0
+#train
+for i in range(folds):
+ for c in clist:
+ print("folds: %d ,c: %g"%(i,c))
+ wlist.append("%d-%g"%(i,c))
+ oum="%d-%g.m" % (i,c)
+ rid = "a%d.rid"%i
+ params = "-T -d -m %s -i %s -o %s -c %g" % (inm,rid,oum,c)
+ #train(params)
+ oup="%d-%g.p"%(i,c)
+ params = "-P -p -m %s -i %s -o %s" %(oum,inr,oup)
+ #bare(params)
+ for p in getpred(oup):
+ mai=max(abs(p),mai);
+
+mai=mai*2;
+
+#inits
+D=getpred(oup)
+for i in range(len(D)):
+ D[i]=1/len(D);
+mod=getmodel(oum);
+for i in range(len(mod)):
+ mod[i]=0;
+
+while len(wlist)>0:
+ low=1e20
+ k=0
+ P=[]
+ #find best weak ranker
+ for w in wlist:
+ t=0
+ pr=getpred(w+".p")
+ for (d,p) in zip(D,pr):
+ if p<=0:
+ t+=d
+ if t<low:
+ low=t
+ k=w
+ P=pr
+
+ print(k)
+ wlist.remove(k)
+ # cal alpha
+ r=0;
+ for (d,p) in zip(D,P):
+ r+=d*p;
+ r=r/mai;
+ a=0.5*math.log((1+r)/(1-r))
+
+ #update model
+ tmod=getmodel(k+".m")
+ for i in range(len(mod)):
+ mod[i]+=a*tmod[i];
+
+ #update D
+ for i in range(len(D)):
+ D[i]=D[i]*math.exp(-a*P[i]);
+
+ #normalize D
+ acc=0;
+ for d in D:
+ acc+=d;
+
+ for i in range(len(D)):
+ D[i]/=acc;
+
+#output model
+print(mod)
+putmodel(resm,mod)
diff --git a/misc.py b/misc.py
index 1a946ed..84ce869 100644
--- a/misc.py
+++ b/misc.py
@@ -6,8 +6,11 @@ def split(params):
call(["./split"]+params.split(" "),stdout=devnull)
def train(params):
+ params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10 --prec 1e-4"
+ bare(params)
+
+def bare(params):
devnull = open(os.devnull, 'w')
- params+=" --iter 1 --cg_prec 1e-4 --ls_prec 1e-10"
call(["./ranksvm"]+params.split(" "),stdout=devnull)
def cmc(params):
@@ -29,3 +32,29 @@ def put(fname,a):
for item in a:
f.write("%s\n" % item)
f.close()
+
+def getmodel(fname):
+ f=open(fname,'r')
+ res=f.read().split('\n')
+ f.close()
+ mod=[];
+ fsize=int(res[1]);
+ for i in range(fsize):
+ mod.append(float(res[3+i]))
+ return mod;
+
+def putmodel(fname,m):
+ f=open(fname,'w')
+ f.write("TN\n")
+ f.write("%d\n" % len(m))
+ f.write("%d 1\n"%len(m))
+ for a in m:
+ f.write("%g\n"%a)
+ f.write("0")
+ f.close()
+
+def getpred(fname):
+ f=open(fname,'r')
+ res=f.read().split('\n')
+ f.close()
+ return [float(p) for p in res[:-1]]