#ifndef DATAPROV_H #define DATAPROV_H #include #include "../tools/easylogging++.h" #include // TODO decide how to construct training data // One possible way for training data: // Matrix composed of an array of feature vectors // Labels are composed of linked list, such as // 6,3,4,0,5,0,0 // => 0->6 | 1->3 | 2->4->5 // How to compensate for non exhaustive labeling? // Use -1 to indicate not yet labeled data // -1s will be excluded from training typedef struct DataEntry{ std::string qid; double rank; Eigen::VectorXd feature; } DataEntry; class DataList{ private: int n; std::vector data; public: int getSize(){return data.size();} void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} int clear(){ for (int i=0;i& getData(){ return data; } ~DataList(){ clear(); } }; class DataProvider //Virtual base class for data input { protected: bool eof; public: DataProvider():eof(false){}; bool EOFile(){return eof;} int getAllData(DataList &out){\ out.clear(); DataList buf; while (!EOFile()) { getDataSet(buf); out.getData().insert(out.getData().end(),buf.getData().begin(),buf.getData().end()); } } virtual int getDataSet(DataList &out) = 0; virtual int open()=0; virtual int close()=0; }; #endif