summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rw-r--r--main.cpp5
-rw-r--r--tools/dataProvider.h2
-rw-r--r--tools/fileDataProvider.h77
-rw-r--r--tools/reidFDataProvider.h8
5 files changed, 82 insertions, 12 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 18ef86f..6920572 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,5 +12,5 @@ FIND_PACKAGE( Boost COMPONENTS program_options REQUIRED )
INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR})
set(SOURCE_FILES main.cpp ./model/ranksvm.cpp ./model/ranksvmtn.cpp ./model/rankaccu.cpp)
-add_executable(ranksvm ${SOURCE_FILES} model/rankaccu.h model/rankaccu.cpp tools/reidFDataProvider.h)
+add_executable(ranksvm ${SOURCE_FILES} model/rankaccu.h model/ranksvm.h model/ranksvmtn.h tools/dataProvider.h tools/easylogging++.h tools/matrixIO.h tools/fileDataProvider.h)
TARGET_LINK_LIBRARIES( ranksvm ${Boost_LIBRARIES} ) \ No newline at end of file
diff --git a/main.cpp b/main.cpp
index cf1dd14..434437f 100644
--- a/main.cpp
+++ b/main.cpp
@@ -110,7 +110,10 @@ int main(int argc, char **argv) {
}
else return 0;
DataProvider* dp;
- dp = new FileDP(vm["feature"].as<std::string>());
+ if (vm["feature"].as<std::string>().find(".rid") == std::string::npos)
+ dp = new FileDP(vm["feature"].as<std::string>());
+ else
+ dp = new RidFileDP(vm["feature"].as<std::string>());
mainf(*dp);
delete dp;
return 0;
diff --git a/tools/dataProvider.h b/tools/dataProvider.h
index 64bfa2d..5a2f3e9 100644
--- a/tools/dataProvider.h
+++ b/tools/dataProvider.h
@@ -16,7 +16,7 @@
// -1s will be excluded from training
typedef struct DataEntry{
- int qid;
+ std::string qid;
double rank;
Eigen::VectorXd feature;
} DataEntry;
diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h
index 942ec9b..c4f6a4a 100644
--- a/tools/fileDataProvider.h
+++ b/tools/fileDataProvider.h
@@ -7,6 +7,8 @@
#include <iostream>
#include <fstream>
+// Rank qid features
+
class FileDP:public DataProvider
{
private:
@@ -14,7 +16,6 @@ private:
std::ifstream fin;
public:
FileDP(std::string fn=""):fname(fn){};
- void setFname(std::string fn){fname=fn;};
virtual int getDataSet(DataList &out){
DataEntry* e;
out.clear();
@@ -44,4 +45,78 @@ public:
virtual int close(){fin.close();return 0;};
};
+// label features
+
+class RidFileDP:public DataProvider
+{
+private:
+ std::string fname;
+ std::ifstream fin;
+ DataList d;
+ bool read;
+ int pos;
+ int qid;
+public:
+ RidFileDP(std::string fn=""):fname(fn){read=false;};
+ virtual int getDataSet(DataList &out){
+ DataEntry *e;
+ int fsize;
+ if (!read) {
+ d.clear();
+ fin >> fsize;
+ LOG(INFO) << "Feature size:" << fsize;
+ d.setfSize(fsize);
+ while (!fin.eof()) {
+ e = new DataEntry;
+ fin >> e->qid;
+ if (e->qid == "0") {
+ delete e;
+ break;
+ }
+ e->feature.resize(fsize);
+ e->rank=-1;
+ for (int i = 0; i < fsize; ++i) {
+ fin >> e->feature(i);
+ }
+ d.addEntry(e);
+ }
+ pos = 0;
+ qid = 1;
+ }
+ out.clear();
+ fsize = d.getfSize();
+ std::vector<DataEntry*> & dat = d.getData();
+ for (int i=0;i<d.getSize();++i)
+ if (i!=pos)
+ {
+ if (dat[i]->qid == dat[pos]->qid)
+ {
+ e = new DataEntry;
+ e->rank=1;
+ dat[i]->qid=std::to_string(qid);
+ }
+ else
+ {
+ e = new DataEntry;
+ e->rank=-1;
+ }
+ e->feature.resize(d.getfSize());
+ e->qid=std::to_string(qid);
+ for (int j = 0; j < fsize; ++j) {
+ e->feature(i) = fabs(dat[i]->feature(j) -dat[pos]->feature(j));
+ }
+ out.addEntry(e);
+ }
+ dat[pos]->qid=std::to_string(qid);
+ ++qid;
+ while (pos<dat.size() && dat[pos]->rank!=-1)
+ ++pos;
+ if (pos==d.getSize())
+ eof = true;
+ return 0;
+ }
+ virtual int open(){fin.open(fname); eof=false;return 0;};
+ virtual int close(){fin.close(); d.clear();return 0;};
+};
+
#endif \ No newline at end of file
diff --git a/tools/reidFDataProvider.h b/tools/reidFDataProvider.h
deleted file mode 100644
index 9fa833a..0000000
--- a/tools/reidFDataProvider.h
+++ /dev/null
@@ -1,8 +0,0 @@
-//
-// Created by joe on 4/26/15.
-//
-
-#ifndef RANKSVM_REIDFDATAPROVIDER_H
-#define RANKSVM_REIDFDATAPROVIDER_H
-
-#endif //RANKSVM_REIDFDATAPROVIDER_H