From 44018ad44d7d0d8196f16402bd1fa6c1c10de8ad Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Tue, 16 Jun 2015 11:34:46 +0800 Subject: fscore --- split.cpp | 2 +- tools/fileDataProvider.cpp | 17 ++++++++++++++--- tools/fileDataProvider.h | 10 +++++++++- train.cpp | 25 +++++++++++++++++++++++-- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/split.cpp b/split.cpp index e774ea9..ec23af2 100644 --- a/split.cpp +++ b/split.cpp @@ -64,7 +64,7 @@ int main(int argc, char **argv) dp.close(); return 0; } - + RidFileDP::seed(); RidFileDP dp(vm["input"].as().c_str()); vector a; vector b; diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp index 9be1132..2b52dc7 100644 --- a/tools/fileDataProvider.cpp +++ b/tools/fileDataProvider.cpp @@ -42,6 +42,11 @@ void RidFileDP::readEntries() { d.clear(); fin >> fsize; LOG(INFO) << "Feature size:" << fsize; + if (!maskinit) + { + for (int i=0;ifeature.resize(fsize); e->rank=-1; + double tin; for (int i = 0; i < fsize; ++i) { - fin >> e->feature(i); + fin >> tin; + e->feature(i) = tin*mask[i]; } d.addEntry(e); } @@ -124,6 +131,10 @@ int RidFileDP::getpSize() { return p.size(); }; +void RidFileDP::seed() { + gen.seed(time(NULL)); +} + void RidFileDP::shuffle(vector &dat) { DataEntry* e; @@ -131,6 +142,7 @@ void RidFileDP::shuffle(vector &dat) for (int i=0;i &dat) void RidFileDP::take(int n,vector &a,vector &b) { - gen.seed(time(NULL)); DataEntry *e; if (!read) readEntries(); @@ -148,9 +159,9 @@ void RidFileDP::take(int n,vector &a,vector &b) a.clear(); b.clear(); std::vector &dat = d.getData(); - shuffle(tmp); for (int i=0;i mask; DataList d; bool read; + bool maskinit; int pos; int qid; public: - RidFileDP(std::string fn=""):fname(fn){read=false;}; + RidFileDP(std::string fn=""):fname(fn),read(false),maskinit(false){}; void readEntries(); + void datmask(std::vector &m){ + mask.resize(m.size()); + for (int i=0;i &dat); @@ -52,6 +59,7 @@ public: for (int i=0;i &msk) +{ + ifstream fin; + int fsize; + fin.open(fname.c_str()); + fin>>fsize; + for (int i=0;i>msk[i]; + fin.close(); +} + int main(int argc, char **argv) { el::Configurations defaultConf; defaultConf.setToDefault(); @@ -133,6 +144,7 @@ int main(int argc, char **argv) { ("single,s", "one from a pair") ("pair,p","get pair result") ("fscore,f","get F-score") + ("mask,M", po::value(), "set feature mask") ("model,m", po::value(), "set input model file") ("output,o", po::value(), "set output model/prediction file") ("feature,i", po::value(), "set input feature file") @@ -177,9 +189,18 @@ int main(int argc, char **argv) { else return 0; DataProvider* dp; if (vm["feature"].as().find(".rid") == string::npos) - dp = new FileDP(vm["feature"].as()); + LOG(FATAL)<<"Format not supported"; else - dp = new RidFileDP(vm["feature"].as()); + { + RidFileDP* tmpdp = new RidFileDP(vm["feature"].as()); + if (vm.count("mask")) + { + vector msk; + getmask(vm["mask"].as(),msk); + tmpdp->datmask(msk); + } + dp = tmpdp; + } mainf(*dp); delete dp; return 0; -- cgit v1.2.3-70-g09d2