#ifndef DATAPROV_H #define DATAPROV_H #include #include "../tools/easylogging++.h" #include #include // Training data(Rid): // First line: the total number of features(fsize) // from the second line to last-1 // nametag(string) fsize number of features for nametag // possible: "-1" nametag mean don't care(partially implemented, not sure) // last line(delimiter to terminate data read): 0 typedef struct DataEntry{ std::string qid; double rank; Eigen::VectorXd feature; } DataEntry; class DataList{ private: int n; std::vector data; public: unsigned long getSize(){return data.size();} void addEntry(DataEntry* d){data.push_back(d);} void setfSize(int fsize){n=fsize;} int getfSize(){return n;} void clear(){ for (int i=0;irank = d->rank; dat->qid = d->qid; dat->feature.resize(d->feature.rows()); for (int i=0;ifeature.rows();++i) { dat->feature(i)=d->feature(i); } return dat; } inline std::vector& getData(){ return data; } ~DataList(){ clear(); } }; class RidList{ private: int n; std::vector uniq; std::vector other; std::vector all; public: static bool single; void clear(){ uniq.clear(); other.clear(); all.clear(); } void setfSize(int fsize){n=fsize;} void addEntry(DataEntry* d){ int ext=false; all.push_back(d); if (d->qid=="-1") { other.push_back(d); return; } for (int i=0;iqid==d->qid) { ext = true; d->rank = i; } if (ext) other.push_back(d); else { uniq.push_back(d); d->rank=uniq.size()-1; } } // A lot of getters inline int getfSize(){return n;} inline DataEntry* getU(int x) { return uniq[x]; } inline DataEntry* getO(int x) { return other[x]; } inline DataEntry* getAll(int x) { return all[x]; } inline std::string getQid(int x) { int a,b,q=getqSize(); a=x/q; if (single) return getU(a)->qid; return getAll(a)->qid; } inline int getqSize() { if (single) return (int)other.size(); return (int)(all.size()-1); } inline int getuSize() { if (single) return (int)uniq.size(); return (int)all.size(); } inline int getSize() { return getuSize()*getqSize(); } inline double getBha(int x){ int a,b,q=getqSize(); a=x/q; b=x%q; double res = 0; Eigen::VectorXd *id,*oth; if (single) { id = &(uniq[a]->feature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;ifeature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;i possible multiplexing inline double cal(Eigen::VectorXd *id,Eigen::VectorXd *oth,int i) { return fabs((*id)[i] - (*oth)[i]); } // TODO getvec as VectorXd -> deprecating due to performance issues inline Eigen::VectorXd getVec(int x){ int a,b,q=getqSize(); a=x/q; b=x%q; Eigen::VectorXd *id,*oth; if (single) { id = &(uniq[a]->feature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } Eigen::VectorXd res(n); for (int i=0;i linear factor inline double getVecDot(int x,const Eigen::VectorXd &w) { int a,b,q=getqSize(); a=x/q; b=x%q; double res = 0; Eigen::VectorXd *id,*oth; if (single) { id = &(uniq[a]->feature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;i linear factor inline void addVecw(int x,double w,Eigen::VectorXd &X) { int a,b,q=getqSize(); a=x/q; b=x%q; Eigen::VectorXd *id,*oth; if (single) { id = &(uniq[a]->feature); oth = &(other[b]->feature); } else { id = &(all[a]->feature); if (bfeature); else oth = &(all[b+1]->feature); } for (int i=0;irank - a) < 1e-5) return 1; return -1; } double id,oth; id = all[a]->rank; if (brank; else oth = all[b+1]->rank; if (fabs(oth - id) < 1e-5) return 1; return -1; }; }; class DataProvider //Virtual base class for data input { protected: bool eof; public: DataProvider():eof(false){}; bool EOFile(){return eof;} virtual void getAllDataSet(RidList &out) = 0; virtual int getDataSet(DataList &out) = 0; virtual int open()=0; virtual int close()=0; }; #endif