#ifndef DATAPROV_H #define DATAPROV_H #include #include "../tools/easylogging++.h" #include // TODO decide how to construct training data // One possible way for training data: // Matrix composed of an array of feature vectors // Labels are composed of linked list, such as // 6,3,4,0,5,0,0 // => 0->6 | 1->3 | 2->4->5 // How to compensate for non exhaustive labeling? // Use -1 to indicate not yet labeled data // -1s will be excluded from training typedef struct DataEntry{ std::string qid; double rank; Eigen::VectorXd feature; } DataEntry; class DataList{ private: int n; std::vector data; public: unsigned long getSize(){return data.size();} void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} void clear(){ for (int i=0;irank = d->rank; dat->qid = d->qid; dat->feature = d->feature; return dat; } inline std::vector& getData(){ return data; } ~DataList(){ clear(); } }; class DataProvider //Virtual base class for data input { protected: bool eof; public: DataProvider():eof(false){}; bool EOFile(){return eof;} void getAllData(DataList &out){\ out.clear(); DataList buf; while (!EOFile()) { getDataSet(buf); // won't work as data are discarded with every call to getDataSet // out.getData().insert(out.getData().end(),buf.getData().begin(),buf.getData().end()); for (int i=0;i