summaryrefslogtreecommitdiff
path: root/tools/fileDataProvider.h
blob: 8ebda206ba0e2b38cfa7c679e07e688e8dc5d885 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#ifndef FDPROV_H
#define FDPROV_H

#include "dataProvider.h"
#include "easylogging++.h"
#include <string>
#include <iostream>
#include <fstream>

// Rank qid features

class FileDP:public DataProvider
{
private:
    std::string fname;
    std::ifstream fin;
public:
    FileDP(std::string fn=""):fname(fn){};
    virtual int getDataSet(DataList &out){
        DataEntry* e;
        out.clear();
        int fsize;
        fin>>fsize;
        LOG(INFO)<<"Feature size:"<<fsize;
        out.setfSize(fsize);
        while (!fin.eof()) {
            e = new DataEntry;
            fin>>e->rank;
            if (e->rank == 0)
            {
                delete e;
                break;
            }
            fin>>e->qid;
            e->feature.resize(fsize);
            for (int i=0;i<fsize;++i) {
                fin>>e->feature(i);
            }
            out.addEntry(e);
        }
        eof=true;
        return 0;
    }
    virtual int open(){fin.open(fname); eof=false;return 0;};
    virtual int close(){fin.close();return 0;};
};

// label features

class RidFileDP:public DataProvider
{
private:
    std::string fname;
    std::ifstream fin;
    DataList d;
    bool read;
    int pos;
    int qid;
public:
    RidFileDP(std::string fn=""):fname(fn){read=false;};
    virtual int getDataSet(DataList &out){
        DataEntry *e;
        int fsize;
        if (!read) {
            d.clear();
            fin >> fsize;
            LOG(INFO) << "Feature size:" << fsize;
            d.setfSize(fsize);
            while (!fin.eof()) {
                e = new DataEntry;
                fin >> e->qid;
                if (e->qid == "0") {
                    delete e;
                    break;
                }
                e->feature.resize(fsize);
                e->rank=-1;
                for (int i = 0; i < fsize; ++i) {
                    fin >> e->feature(i);
                }
                d.addEntry(e);
            }
            pos = 0;
            qid = 1;
            read = true;
        }
        out.clear();
        fsize = d.getfSize();
        out.setfSize(fsize);
        std::vector<DataEntry*> & dat = d.getData();
        for (int i=0;i<d.getSize();++i)
            if (i!=pos)
            {
                if (dat[i]->qid == dat[pos]->qid)
                {
                    e = new DataEntry;
                    e->rank=1;
                    dat[i]->qid=std::to_string(qid);
                    dat[i]->rank=qid;
                }
                else
                {
                    e = new DataEntry;
                    e->rank=-1;
                }
                e->feature.resize(d.getfSize());
                e->qid=std::to_string(qid);
                for (int j = 0; j < fsize; ++j) {
                    e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j));
                }
                out.addEntry(e);
            }
        dat[pos]->qid=std::to_string(qid);
        ++qid;
        dat[pos]->rank=qid;
        while (pos<dat.size() && dat[pos]->rank!=-1)
            ++pos;
        if (pos==d.getSize())
            eof = true;
        return 0;
    }
    virtual int open(){fin.open(fname); eof=false;return 0;};
    virtual int close(){fin.close(); d.clear();return 0;};
};

#endif