summaryrefslogtreecommitdiff
path: root/tools/fileDataProvider.h
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2015-05-13 13:35:03 +0800
committerJoe Zhao <ztuowen@gmail.com>2015-05-13 13:35:03 +0800
commit20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869 (patch)
tree8da41db1cef2bcedadeb5769832d95c45ffb7f13 /tools/fileDataProvider.h
parent62b6b42e27a4972397e94fdbb03e74ac3f5f1244 (diff)
downloadranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.tar.gz
ranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.tar.bz2
ranksvm-20587ac550cfcb2d7b3d6ec16e46ba1a8d0af869.zip
added split
Diffstat (limited to 'tools/fileDataProvider.h')
-rw-r--r--tools/fileDataProvider.h91
1 files changed, 6 insertions, 85 deletions
diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h
index f54a38e..7bea92d 100644
--- a/tools/fileDataProvider.h
+++ b/tools/fileDataProvider.h
@@ -16,31 +16,7 @@ private:
std::ifstream fin;
public:
FileDP(std::string fn=""):fname(fn){};
- virtual int getDataSet(DataList &out){
- DataEntry* e;
- out.clear();
- int fsize;
- fin>>fsize;
- LOG(INFO)<<"Feature size:"<<fsize;
- out.setfSize(fsize);
- while (!fin.eof()) {
- e = new DataEntry;
- fin>>e->rank;
- if (e->rank == 0)
- {
- delete e;
- break;
- }
- fin>>e->qid;
- e->feature.resize(fsize);
- for (int i=0;i<fsize;++i) {
- fin>>e->feature(i);
- }
- out.addEntry(e);
- }
- eof=true;
- return 0;
- }
+ virtual int getDataSet(DataList &out);
virtual int open(){fin.open(fname); eof=false;return 0;};
virtual int close(){fin.close();return 0;};
};
@@ -58,68 +34,13 @@ private:
int qid;
public:
RidFileDP(std::string fn=""):fname(fn){read=false;};
- virtual int getDataSet(DataList &out){
- DataEntry *e;
- int fsize;
- if (!read) {
- d.clear();
- fin >> fsize;
- LOG(INFO) << "Feature size:" << fsize;
- d.setfSize(fsize);
- while (!fin.eof()) {
- e = new DataEntry;
- fin >> e->qid;
- if (e->qid == "0") {
- delete e;
- break;
- }
- e->feature.resize(fsize);
- e->rank=-1;
- for (int i = 0; i < fsize; ++i) {
- fin >> e->feature(i);
- }
- d.addEntry(e);
- }
- pos = 0;
- qid = 1;
- read = true;
- }
- out.clear();
- fsize = d.getfSize();
- out.setfSize(fsize);
- std::vector<DataEntry*> & dat = d.getData();
- for (int i=0;i<d.getSize();++i)
- if (i!=pos)
- {
- if (dat[i]->qid == dat[pos]->qid)
- {
- e = new DataEntry;
- e->rank=1;
- dat[i]->rank=qid;
- }
- else
- {
- e = new DataEntry;
- e->rank=-1;
- }
- e->feature.resize(d.getfSize());
- e->qid=dat[pos]->qid;
- for (int j = 0; j < fsize; ++j) {
- e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j));
- }
- out.addEntry(e);
- }
- dat[pos]->qid=std::to_string(qid);
- ++qid;
- dat[pos]->rank=qid;
- while (pos<dat.size() && dat[pos]->rank!=-1)
- ++pos;
- if (pos==d.getSize())
- eof = true;
- return 0;
- }
+ void readEntries();
+ int getfSize() { if(!read) readEntries(); return d.getfSize();};
+ int getpSize();
+ virtual int getDataSet(DataList &out);
virtual int open(){fin.open(fname); eof=false;return 0;};
virtual int close(){fin.close(); d.clear();return 0;};
+ void take(int n,std::vector<DataEntry*> &a,std::vector<DataEntry*> &b);
};
#endif \ No newline at end of file