#ifndef DATAPROV_H #define DATAPROV_H #include #include "../tools/easylogging++.h" #include // TODO decide how to construct training data // One possible way for training data: // Matrix composed of an array of feature vectors // Labels are composed of linked list, such as // 6,3,4,0,5,0,0 // => 0->6 | 1->3 | 2->4->5 // How to compensate for non exhaustive labeling? // Use -1 to indicate not yet labeled data // -1s will be excluded from training typedef struct DataEntry{ std::string qid; double rank; Eigen::VectorXd feature; } DataEntry; class DataList{ private: int n; std::vector data; public: unsigned long getSize(){return data.size();} void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} void clear(){ for (int i=0;irank = d->rank; dat->qid = d->qid; dat->feature.resize(d->feature.rows()); for (int i=0;ifeature.rows();++i) { dat->feature(i)=d->feature(i); } return dat; } inline std::vector& getData(){ return data; } ~DataList(){ clear(); } }; class RidList{ private: int n; std::vector uniq; std::vector other; public: void clear(){ uniq.clear(); other.clear(); } void setfSize(int fsize){n=fsize;} inline int getfSize(){return n;} void addEntry(DataEntry* d){ int ext=false; if (d->qid=="-1") other.push_back(d); for (int i=0;iqid==d->qid) { ext = true; d->rank = i; } if (ext) other.push_back(d); else uniq.push_back(d); } inline DataEntry* getU(int x) { return uniq[x]; } inline DataEntry* getO(int x) { return other[x]; } inline std::string getQid(int x) { int a,b,n=getqSize(); a=x/n; return getU(a)->qid; } inline int getqSize() { return (int)other.size(); } inline int getuSize() { return (int)uniq.size(); } inline int getSize() { return getuSize()*getqSize(); } inline Eigen::VectorXd getVec(int x){ int a,b,n=getqSize(); a=x/n; b=x%n; Eigen::VectorXd vec; return (uniq[a]->feature-other[b]->feature).cwiseAbs(); }; inline double getL(int x){ int a,b,n=getqSize(); a=x/n; b=x%n; if (std::fabs(other[b]->rank - a) < 1e-5) return 1; return -1; }; }; class DataProvider //Virtual base class for data input { protected: bool eof; public: DataProvider():eof(false){}; bool EOFile(){return eof;} virtual void getAllDataSet(RidList &out) = 0; virtual int getDataSet(DataList &out) = 0; virtual int open()=0; virtual int close()=0; }; #endif