summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2015-05-22 11:31:25 +0800
committerJoe Zhao <ztuowen@gmail.com>2015-05-22 11:31:25 +0800
commit01ea2597d922774ff641d6a2eacae22979d68802 (patch)
treee59713ad85311938f05ab5ad28c82374a3005e72
parent482be9136a726923eb24c9f5c0244e94c0fc91bc (diff)
downloadranksvm-01ea2597d922774ff641d6a2eacae22979d68802.tar.gz
ranksvm-01ea2597d922774ff641d6a2eacae22979d68802.tar.bz2
ranksvm-01ea2597d922774ff641d6a2eacae22979d68802.zip
added shuffle option
-rw-r--r--split.cpp14
-rw-r--r--tools/fileDataProvider.cpp8
-rw-r--r--tools/fileDataProvider.h9
3 files changed, 24 insertions, 7 deletions
diff --git a/split.cpp b/split.cpp
index be80545..e774ea9 100644
--- a/split.cpp
+++ b/split.cpp
@@ -43,6 +43,7 @@ int main(int argc, char **argv)
desc.add_options()
("help,h", "produce help message")
("query,Q", "Query person count")
+ ("shuffle,s","shuffle input")
("count,c", po::value<int>(), "take number")
("take,a", po::value<string>(), "set output rid file 1(taken)")
("left,b", po::value<string>(), "set output rid file 2(left)")
@@ -68,9 +69,16 @@ int main(int argc, char **argv)
vector<DataEntry*> a;
vector<DataEntry*> b;
dp.open();
- dp.take(vm["count"].as<int>(),a,b);
- outputRid(a,dp.getfSize(),vm["take"].as<string>());
- outputRid(b,dp.getfSize(),vm["left"].as<string>());
+ if (vm.count("shuffle")) {
+ dp.getRidVector(a);
+ dp.shuffle(a);
+ outputRid(a, dp.getfSize(), vm["take"].as<string>());
+ }
+ else {
+ dp.take(vm["count"].as<int>(), a, b);
+ outputRid(a, dp.getfSize(), vm["take"].as<string>());
+ outputRid(b, dp.getfSize(), vm["left"].as<string>());
+ }
dp.close();
return 0;
} \ No newline at end of file
diff --git a/tools/fileDataProvider.cpp b/tools/fileDataProvider.cpp
index b1f3f5e..9be1132 100644
--- a/tools/fileDataProvider.cpp
+++ b/tools/fileDataProvider.cpp
@@ -124,7 +124,7 @@ int RidFileDP::getpSize() {
return p.size();
};
-void scrambler(vector<DataEntry*> &dat)
+void RidFileDP::shuffle(vector<DataEntry*> &dat)
{
DataEntry* e;
int sz=(int)dat.size();
@@ -148,7 +148,7 @@ void RidFileDP::take(int n,vector<DataEntry*> &a,vector<DataEntry*> &b)
a.clear();
b.clear();
std::vector<DataEntry*> &dat = d.getData();
- scrambler(tmp);
+ shuffle(tmp);
for (int i=0;i<dat.size();++i)
tmp.push_back(dat[i]);
int pos = 0;
@@ -170,8 +170,8 @@ void RidFileDP::take(int n,vector<DataEntry*> &a,vector<DataEntry*> &b)
for (int i=0;i<tmp.size();++i)
if (tmp[i]!=NULL)
b.push_back(tmp[i]);
- scrambler(a);
- scrambler(b);
+ shuffle(a);
+ shuffle(b);
}
void RidFileDP::getAllDataSet(RidList &out){
diff --git a/tools/fileDataProvider.h b/tools/fileDataProvider.h
index 567c8e2..972a4c5 100644
--- a/tools/fileDataProvider.h
+++ b/tools/fileDataProvider.h
@@ -38,11 +38,20 @@ public:
void readEntries();
int getfSize() { if(!read) readEntries(); return d.getfSize();};
int getpSize();
+ void shuffle(std::vector<DataEntry*> &dat);
virtual void getAllDataSet(RidList &out);
virtual int getDataSet(DataList &out);
virtual int open(){fin.open(fname); eof=false;return 0;};
virtual int close(){fin.close(); d.clear();return 0;};
void take(int n,std::vector<DataEntry*> &a,std::vector<DataEntry*> &b);
+ void getRidVector(std::vector<DataEntry*> &rid){
+ if (!read)
+ readEntries();
+ rid.clear();
+ std::vector<DataEntry*> &dat = d.getData();
+ for (int i=0;i<dat.size();++i)
+ rid.push_back(dat[i]);
+ }
};
#endif \ No newline at end of file