#ifndef DATAPROV_H #define DATAPROV_H #include #include "../tools/easylogging++.h" #include #include // TODO decide how to construct training data // One possible way for training data: // Matrix composed of an array of feature vectors // Labels are composed of linked list, such as // 6,3,4,0,5,0,0 // => 0->6 | 1->3 | 2->4->5 // How to compensate for non exhaustive labeling? // Use -1 to indicate not yet labeled data // -1s will be excluded from training typedef struct DataEntry{ std::string qid; double rank; Eigen::VectorXd feature; } DataEntry; class DataList{ private: int n; std::vector data; public: unsigned long getSize(){return data.size();} void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} void clear(){ for (int i=0;irank = d->rank; dat->qid = d->qid; dat->feature.resize(d->feature.rows()); for (int i=0;ifeature.rows();++i) { dat->feature(i)=d->feature(i); } return dat; } inline std::vector& getData(){ return data; } ~DataList(){ clear(); } }; class RidList{ private: int n; std::vector uniq; std::vector other; std::vector all; public: static bool single; void clear(){ uniq.clear(); other.clear(); all.clear(); } void setfSize(int fsize){n=fsize;} inline int getfSize(){return n;} void addEntry(DataEntry* d){ int ext=false; all.push_back(d); if (d->qid=="-1") { other.push_back(d); return; } for (int i=0;iqid==d->qid) { ext = true; d->rank = i; } if (ext) other.push_back(d); else { uniq.push_back(d); d->rank=uniq.size()-1; } } inline DataEntry* getU(int x) { return uniq[x]; } inline DataEntry* getO(int x) { return other[x]; } inline DataEntry* getAll(int x) { return all[x]; } inline std::string getQid(int x) { int a,b,q=getqSize(); a=x/q; if (single) return getU(a)->qid; return getAll(a)->qid; } inline int getqSize() { if (single) return (int)other.size(); return (int)(all.size()-1); } inline int getuSize() { if (single) return (int)uniq.size(); return (int)all.size(); } inline int getSize() { return getuSize()*getqSize(); } inline double getBha(int x){ int a,b,q=getqSize(); a=x/q; b=x%q; double res = 0; Eigen::VectorXd *id,*oth; if (single) { id = &(uniq[a]->feature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;ifeature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;ifeature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } Eigen::VectorXd res(n); for (int i=0;ifeature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;ifeature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;irank - a) < 1e-5) return 1; return -1; } double id,oth; id = all[a]->rank; if (brank; else oth = all[b+1]->rank; if (fabs(oth - id) < 1e-5) return 1; return -1; }; }; class DataProvider //Virtual base class for data input { protected: bool eof; public: DataProvider():eof(false){}; bool EOFile(){return eof;} virtual void getAllDataSet(RidList &out) = 0; virtual int getDataSet(DataList &out) = 0; virtual int open()=0; virtual int close()=0; }; #endif