summaryrefslogtreecommitdiff
path: root/split.cpp
blob: bcfac478e794854385f58b4ae3e567dccc96bb2c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*
 * split: helper program to split rid file
 *  usage: ./split -h to see all options
 *  support:
 *      shuffling
 *      splitting
 */

#include <iostream>
#include <boost/program_options.hpp>
#include "tools/dataProvider.h"
#include "tools/fileDataProvider.h"
#include <vector>
#include <fstream>

INITIALIZE_EASYLOGGINGPP

using namespace std;
namespace po = boost::program_options;

po::variables_map vm;

int outputRid(vector<DataEntry*> a,int fsize,string fname)
{
    ofstream fout(fname.c_str());
    fout<<fsize<<endl;
    for (int i=0;i<a.size();++i)
    {
        fout<< a[i]->qid;
        for (int j=0;j<fsize;++j)
            fout<<" "<< a[i]->feature(j);
        fout<<endl;
    }
    fout<<0;
    fout.close();
}

int main(int argc, char **argv)
{
    el::Configurations defaultConf;
    defaultConf.setToDefault();
    // Values are always std::string
    defaultConf.set(el::Level::Global,el::ConfigurationType::Enabled, "false");
    // default logger uses default configurations
    el::Loggers::reconfigureLogger("default", defaultConf);
    po::options_description desc("Allowed options");
    desc.add_options()
            ("help,h", "produce help message")
            ("query,Q", "Query person count")
            ("shuffle,s","shuffle input")
            ("count,c", po::value<int>(), "take number")
            ("take,a", po::value<string>(), "set output rid file 1(taken)")
            ("left,b", po::value<string>(), "set output rid file 2(left)")
            ("input,i", po::value<string>(), "set input Rid file");

    po::store(po::parse_command_line(argc, argv, desc), vm);
    po::notify(vm);
    // Print help if necessary
    if (vm.count("help")) {
        cout << desc;
        return 0;
    }

    if (vm.count("query")){
        RidFileDP dp(vm["input"].as<string>().c_str());
        dp.open();
        cout<<dp.getpSize()<<endl;
        dp.close();
        return 0;
    }
    RidFileDP::seed();
    RidFileDP dp(vm["input"].as<string>().c_str());
    vector<DataEntry*> a;
    vector<DataEntry*> b;
    dp.open();
    if (vm.count("shuffle")) {
        dp.getRidVector(a);
        dp.shuffle(a);
        outputRid(a, dp.getfSize(), vm["take"].as<string>());
    }
    else {
        dp.take(vm["count"].as<int>(), a, b);
        outputRid(a, dp.getfSize(), vm["take"].as<string>());
        outputRid(b, dp.getfSize(), vm["left"].as<string>());
    }
    dp.close();
    return 0;
}