//
// Created by joe on 5/13/15.
//

#include "fileDataProvider.h"
#include <random>
#include <ctime>

using namespace std;

mt19937 gen;

int FileDP::getDataSet(DataList &out){
    DataEntry* e;
    out.clear();
    int fsize;
    fin>>fsize;
    LOG(INFO)<<"Feature size:"<<fsize;
    out.setfSize(fsize);
    while (!fin.eof()) {
        e = new DataEntry;
        fin>>e->rank;
        if (e->rank == 0)
        {
            delete e;
            break;
        }
        fin>>e->qid;
        e->feature.resize(fsize);
        for (int i=0;i<fsize;++i) {
            fin>>e->feature(i);
        }
        out.addEntry(e);
    }
    eof=true;
    return 0;
}

void RidFileDP::readEntries() {
    DataEntry *e;
    int fsize;
    d.clear();
    fin >> fsize;
    LOG(INFO) << "Feature size:" << fsize;
    d.setfSize(fsize);
    while (!fin.eof()) {
        e = new DataEntry;
        fin >> e->qid;
        if (e->qid == "0") {
            delete e;
            break;
        }
        e->feature.resize(fsize);
        e->rank=-1;
        for (int i = 0; i < fsize; ++i) {
            fin >> e->feature(i);
        }
        d.addEntry(e);
    }
    pos = 0;
    std::vector<DataEntry*> & dat = d.getData();
    while (pos<dat.size() && dat[pos]->rank!=-1 && dat[pos]->qid!="-1")
        ++pos;
    qid = 1;
    read = true;
}

int RidFileDP::getDataSet(DataList &out){
    DataEntry *e;
    int fsize;
    if (!read)
        readEntries();
    out.clear();
    fsize = d.getfSize();
    out.setfSize(fsize);
    std::vector<DataEntry*> & dat = d.getData();
    for (int i=0;i<d.getSize();++i)
        if (i!=pos)
        {
            if (dat[i]->qid == dat[pos]->qid)
            {
                e = new DataEntry;
                e->rank=1;
                dat[i]->rank=qid;
            }
            else
            {
                e = new DataEntry;
                e->rank=-1;
            }
            e->feature.resize(d.getfSize());
            e->qid=dat[pos]->qid;
            for (int j = 0; j < fsize; ++j) {
                e->feature(j) = fabs(dat[i]->feature(j) -dat[pos]->feature(j));
            }
            out.addEntry(e);
        }
    ++qid;
    dat[pos]->rank=qid;
    while (pos<dat.size() && (dat[pos]->rank!=-1 || dat[pos]->qid=="-1"))
        ++pos;
    if (pos==d.getSize())
        eof = true;
    return 0;
}

int RidFileDP::getpSize() {
    std::vector<string> p;
    if (!read)
        readEntries();
    std::vector<DataEntry*> &dat = d.getData();
    for (int i=0;i<dat.size();++i)
    {
        bool ext=false;
        for (int j=0;j<p.size();++j)
            if (p[j] == dat[i]->qid )
            {
                ext=true;
                break;
            }
        if (!ext)
            p.push_back(dat[i]->qid);
    }
    return p.size();
};

void scrambler(vector<DataEntry*> &dat)
{
    DataEntry* e;
    int sz=(int)dat.size();
    for (int i=0;i<sz;++i)
    {
        int pos = (int)(gen()%(sz-i));
        e=dat[pos];
        dat[pos] = dat[sz-i-1];
        dat[sz-i-1] = e;
    }
}

void RidFileDP::take(int n,vector<DataEntry*> &a,vector<DataEntry*> &b)
{
    gen.seed(time(NULL));
    DataEntry *e;
    if (!read)
        readEntries();
    vector<DataEntry*> tmp;
    tmp.reserve(d.getSize());
    a.clear();
    b.clear();
    std::vector<DataEntry*> &dat = d.getData();
    scrambler(tmp);
    for (int i=0;i<dat.size();++i)
        tmp.push_back(dat[i]);
    int pos = 0;
    string qid;
    for (int i=0;i<n;++i)
    {
        while (tmp[pos]==NULL)
            ++pos;
        qid = tmp[pos]->qid;
        a.push_back(tmp[pos]);
        tmp[pos]=NULL;
        for (int j = pos+1; j< tmp.size();++j)
            if (tmp[j]!=NULL &&tmp[j]->qid==qid)
            {
                a.push_back(tmp[j]);
                tmp[j]=NULL;
            }
    }
    for (int i=0;i<tmp.size();++i)
        if (tmp[i]!=NULL)
            b.push_back(tmp[i]);
    scrambler(a);
    scrambler(b);
}

void RidFileDP::getAllDataSet(RidList &out){
    DataEntry *e;
    if (!read)
        readEntries();
    out.clear();
    std::vector<DataEntry*> &dat = d.getData();
    for (int i=0;i<dat.size();++i)
        out.addEntry(dat[i]);
    out.setfSize(d.getfSize());
}