diff options
author | Joe Zhao <ztuowen@gmail.com> | 2015-05-13 13:35:03 +0800 |
---|---|---|
committer | Joe Zhao <ztuowen@gmail.com> | 2015-05-13 13:35:03 +0800 |
commit | 20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869 (patch) | |
tree | 8da41db1cef2bcedadeb5769832d95c45ffb7f13 /tools | |
parent | 62b6b42e27a4972397e94fdbb03e74ac3f5f1244 (diff) | |
download | ranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.tar.gz ranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.tar.bz2 ranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.zip |
added split
Diffstat (limited to 'tools')
-rw-r--r-- | tools/fileDataProvider.cpp | 173 | ||||
-rw-r--r-- | tools/fileDataProvider.h | 91 |
2 files changed, 179 insertions, 85 deletions
diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp new file mode 100644 index 0000000..e9b7f3d --- /dev/null +++ b/tools/fileDataProvider.cpp @@ -0,0 +1,173 @@ +// +// Created by joe on 5/13/15. +// + +#include "fileDataProvider.h" +#include <random> +#include <ctime> + +using namespace std; + +mt19937 gen; + +int FileDP::getDataSet(DataList &out){ + DataEntry* e; + out.clear(); + int fsize; + fin>>fsize; + LOG(INFO)<<"Feature size:"<<fsize; + out.setfSize(fsize); + while (!fin.eof()) { + e = new DataEntry; + fin>>e->rank; + if (e->rank == 0) + { + delete e; + break; + } + fin>>e->qid; + e->feature.resize(fsize); + for (int i=0;i<fsize;++i) { + fin>>e->feature(i); + } + out.addEntry(e); + } + eof=true; + return 0; +} + +void RidFileDP::readEntries() { + DataEntry *e; + int fsize; + d.clear(); + fin >> fsize; + LOG(INFO) << "Feature size:" << fsize; + d.setfSize(fsize); + while (!fin.eof()) { + e = new DataEntry; + fin >> e->qid; + if (e->qid == "0") { + delete e; + break; + } + e->feature.resize(fsize); + e->rank=-1; + for (int i = 0; i < fsize; ++i) { + fin >> e->feature(i); + } + d.addEntry(e); + } + pos = 0; + qid = 1; + read = true; +} + +int RidFileDP::getDataSet(DataList &out){ + DataEntry *e; + int fsize; + if (!read) + readEntries(); + out.clear(); + fsize = d.getfSize(); + out.setfSize(fsize); + std::vector<DataEntry*> & dat = d.getData(); + for (int i=0;i<d.getSize();++i) + if (i!=pos) + { + if (dat[i]->qid == dat[pos]->qid) + { + e = new DataEntry; + e->rank=1; + dat[i]->rank=qid; + } + else + { + e = new DataEntry; + e->rank=-1; + } + e->feature.resize(d.getfSize()); + e->qid=dat[pos]->qid; + for (int j = 0; j < fsize; ++j) { + e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j)); + } + out.addEntry(e); + } + dat[pos]->qid=std::to_string(qid); + ++qid; + dat[pos]->rank=qid; + while (pos<dat.size() && dat[pos]->rank!=-1) + ++pos; + if (pos==d.getSize()) + eof = true; + return 0; +} + +int RidFileDP::getpSize() { + std::vector<string> p; + if (!read) + readEntries(); + std::vector<DataEntry*> &dat = d.getData(); + for (int i=0;i<dat.size();++i) + { + bool ext=false; + for (int j=0;j<p.size();++j) + if (p[j] == dat[i]->qid ) + { + ext=true; + break; + } + if (!ext) + p.push_back(dat[i]->qid); + } + return p.size(); +}; + +void scrambler(vector<DataEntry*> &dat) +{ + DataEntry* e; + int sz=(int)dat.size(); + for (int i=0;i<sz;++i) + { + int pos = (int)(gen()%(sz-i)); + e=dat[pos]; + dat[pos] = dat[sz-i-1]; + dat[sz-i-1] = e; + } +} + +void RidFileDP::take(int n,vector<DataEntry*> &a,vector<DataEntry*> &b) +{ + gen.seed(time(NULL)); + DataEntry *e; + if (!read) + readEntries(); + vector<DataEntry*> tmp; + tmp.reserve(d.getSize()); + a.clear(); + b.clear(); + std::vector<DataEntry*> &dat = d.getData(); + scrambler(tmp); + for (int i=0;i<dat.size();++i) + tmp.push_back(dat[i]); + int pos = 0; + string qid; + for (int i=0;i<n;++i) + { + while (tmp[pos]==NULL) + ++pos; + qid = tmp[pos]->qid; + a.push_back(tmp[pos]); + tmp[pos]=NULL; + for (int j = pos+1; j< tmp.size();++j) + if (tmp[j]!=NULL &&tmp[j]->qid==qid) + { + a.push_back(tmp[j]); + tmp[j]=NULL; + } + } + for (int i=0;i<tmp.size();++i) + if (tmp[i]!=NULL) + b.push_back(tmp[i]); + scrambler(a); + scrambler(b); +}
\ No newline at end of file diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h index f54a38e..7bea92d 100644 --- a/tools/fileDataProvider.h +++ b/tools/fileDataProvider.h @@ -16,31 +16,7 @@ private: std::ifstream fin; public: FileDP(std::string fn=""):fname(fn){}; - virtual int getDataSet(DataList &out){ - DataEntry* e; - out.clear(); - int fsize; - fin>>fsize; - LOG(INFO)<<"Feature size:"<<fsize; - out.setfSize(fsize); - while (!fin.eof()) { - e = new DataEntry; - fin>>e->rank; - if (e->rank == 0) - { - delete e; - break; - } - fin>>e->qid; - e->feature.resize(fsize); - for (int i=0;i<fsize;++i) { - fin>>e->feature(i); - } - out.addEntry(e); - } - eof=true; - return 0; - } + virtual int getDataSet(DataList &out); virtual int open(){fin.open(fname); eof=false;return 0;}; virtual int close(){fin.close();return 0;}; }; @@ -58,68 +34,13 @@ private: int qid; public: RidFileDP(std::string fn=""):fname(fn){read=false;}; - virtual int getDataSet(DataList &out){ - DataEntry *e; - int fsize; - if (!read) { - d.clear(); - fin >> fsize; - LOG(INFO) << "Feature size:" << fsize; - d.setfSize(fsize); - while (!fin.eof()) { - e = new DataEntry; - fin >> e->qid; - if (e->qid == "0") { - delete e; - break; - } - e->feature.resize(fsize); - e->rank=-1; - for (int i = 0; i < fsize; ++i) { - fin >> e->feature(i); - } - d.addEntry(e); - } - pos = 0; - qid = 1; - read = true; - } - out.clear(); - fsize = d.getfSize(); - out.setfSize(fsize); - std::vector<DataEntry*> & dat = d.getData(); - for (int i=0;i<d.getSize();++i) - if (i!=pos) - { - if (dat[i]->qid == dat[pos]->qid) - { - e = new DataEntry; - e->rank=1; - dat[i]->rank=qid; - } - else - { - e = new DataEntry; - e->rank=-1; - } - e->feature.resize(d.getfSize()); - e->qid=dat[pos]->qid; - for (int j = 0; j < fsize; ++j) { - e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j)); - } - out.addEntry(e); - } - dat[pos]->qid=std::to_string(qid); - ++qid; - dat[pos]->rank=qid; - while (pos<dat.size() && dat[pos]->rank!=-1) - ++pos; - if (pos==d.getSize()) - eof = true; - return 0; - } + void readEntries(); + int getfSize() { if(!read) readEntries(); return d.getfSize();}; + int getpSize(); + virtual int getDataSet(DataList &out); virtual int open(){fin.open(fname); eof=false;return 0;}; virtual int close(){fin.close(); d.clear();return 0;}; + void take(int n,std::vector<DataEntry*> &a,std::vector<DataEntry*> &b); }; #endif
\ No newline at end of file |