-
Notifications
You must be signed in to change notification settings - Fork 0
/
CSVLoader.cpp
executable file
·77 lines (61 loc) · 2.08 KB
/
CSVLoader.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include "CSVLoader.h"
using namespace std;
float CSVRow::operator[](size_t index){
string& eg = m_data[index];
return atof(eg.c_str());
}
size_t CSVRow::size() const{
return m_data.size();
}
void CSVRow::readNextRow(istream& str) {
string line, cell;
getline(str, line);
stringstream lineStream(line);
m_data.clear();
while (getline(lineStream, cell, ','))
m_data.push_back(cell);
// This checks for a trailing comma with no data after it.
if (!lineStream && cell.empty()){
// If there was a trailing comma then add an empty element.
m_data.push_back("");
}
}
istream& operator>>(istream& str, CSVRow& data){
data.readNextRow(str);
return str;
}
vector<float> process_data(ifstream& file, int& dim) {
vector<vector<float>> features;
CSVRow row;
while (file >> row) {
features.emplace_back();
// skips first column (id)
for (size_t loop = 1;loop < row.size(); ++loop)
features.back().emplace_back(row[loop]);
}
// Flatten features vectors to 1D
vector<float> inputs = features[0];
int64_t total = accumulate(begin(features) + 1, end(features), 0UL, [](size_t s, vector<float> const& v){return s + v.size();});
inputs.reserve(total);
for (size_t i = 1; i < features.size(); i++)
inputs.insert(inputs.end(), features[i].begin(), features[i].end());
dim = int(features[0].size());
return inputs;
}
vector<float> process_queries(ifstream& file, int& dim) {
vector<vector<float>> features;
CSVRow row;
while (file >> row) {
features.emplace_back();
// skips first/last columns
for (size_t loop = 1;loop < row.size() - 1; ++loop)
features.back().emplace_back(row[loop]);
}
vector<float> inputs = features[0];
int64_t total = accumulate(begin(features) + 1, end(features), 0UL, [](size_t s, vector<float> const& v){return s + v.size();});
inputs.reserve(total);
for (size_t i = 1; i < features.size(); i++)
inputs.insert(inputs.end(), features[i].begin(), features[i].end());
dim = int(features[0].size())/2;
return inputs;
}