From 20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869 Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Wed, 13 May 2015 13:35:03 +0800 Subject: added split --- tools/fileDataProvider.cpp | 173 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 tools/fileDataProvider.cpp (limited to 'tools/fileDataProvider.cpp') diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp new file mode 100644 index 0000000..e9b7f3d --- /dev/null +++ b/tools/fileDataProvider.cpp @@ -0,0 +1,173 @@ +// +// Created by joe on 5/13/15. +// + +#include "fileDataProvider.h" +#include +#include + +using namespace std; + +mt19937 gen; + +int FileDP::getDataSet(DataList &out){ + DataEntry* e; + out.clear(); + int fsize; + fin>>fsize; + LOG(INFO)<<"Feature size:"<>e->rank; + if (e->rank == 0) + { + delete e; + break; + } + fin>>e->qid; + e->feature.resize(fsize); + for (int i=0;i>e->feature(i); + } + out.addEntry(e); + } + eof=true; + return 0; +} + +void RidFileDP::readEntries() { + DataEntry *e; + int fsize; + d.clear(); + fin >> fsize; + LOG(INFO) << "Feature size:" << fsize; + d.setfSize(fsize); + while (!fin.eof()) { + e = new DataEntry; + fin >> e->qid; + if (e->qid == "0") { + delete e; + break; + } + e->feature.resize(fsize); + e->rank=-1; + for (int i = 0; i < fsize; ++i) { + fin >> e->feature(i); + } + d.addEntry(e); + } + pos = 0; + qid = 1; + read = true; +} + +int RidFileDP::getDataSet(DataList &out){ + DataEntry *e; + int fsize; + if (!read) + readEntries(); + out.clear(); + fsize = d.getfSize(); + out.setfSize(fsize); + std::vector & dat = d.getData(); + for (int i=0;iqid == dat[pos]->qid) + { + e = new DataEntry; + e->rank=1; + dat[i]->rank=qid; + } + else + { + e = new DataEntry; + e->rank=-1; + } + e->feature.resize(d.getfSize()); + e->qid=dat[pos]->qid; + for (int j = 0; j < fsize; ++j) { + e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j)); + } + out.addEntry(e); + } + dat[pos]->qid=std::to_string(qid); + ++qid; + dat[pos]->rank=qid; + while (posrank!=-1) + ++pos; + if (pos==d.getSize()) + eof = true; + return 0; +} + +int RidFileDP::getpSize() { + std::vector p; + if (!read) + readEntries(); + std::vector &dat = d.getData(); + for (int i=0;iqid ) + { + ext=true; + break; + } + if (!ext) + p.push_back(dat[i]->qid); + } + return p.size(); +}; + +void scrambler(vector &dat) +{ + DataEntry* e; + int sz=(int)dat.size(); + for (int i=0;i &a,vector &b) +{ + gen.seed(time(NULL)); + DataEntry *e; + if (!read) + readEntries(); + vector tmp; + tmp.reserve(d.getSize()); + a.clear(); + b.clear(); + std::vector &dat = d.getData(); + scrambler(tmp); + for (int i=0;iqid; + a.push_back(tmp[pos]); + tmp[pos]=NULL; + for (int j = pos+1; j< tmp.size();++j) + if (tmp[j]!=NULL &&tmp[j]->qid==qid) + { + a.push_back(tmp[j]); + tmp[j]=NULL; + } + } + for (int i=0;i