I am working with comma separated value (.csv) files which contain information that is used in later calculations. I have been storing the data in two dimensional vectors with success but I feel that my code is somewhat clunky. I am looking for improvements to my approach, either changes to my current approach or suggestions for a better approach all together.
void FileData::setDataVector (string file){
//Opens .csv file and checks that it can be read.
std::ifstream infile;
std::string inf=file+".csv";
infile.open(inf.c_str(),ios::in);
if (!infile.is_open()){
cout << "Unable to open "+inf << endl;
exit(1);
}
//Sets the size of the vector
int i=0;
while (infile.good()){
getline(infile,inf);
if (inf.size()!=0){
i++;
}
}
dataVector.resize(i);
infile.clear();
infile.seekg(0, ios::beg);
//Writes each line to the two dimensional vector
std::string line;
int start,stop;
for (int n=0; infile.good(); n++){
getline(infile,line);
if ((line.size())==0) {
return;
}
start=line.find_first_not_of(", ");
stop=line.find_first_of(", ",start);
//Fills out the columns of the current row of dataVector
while(start!=-1) {
dataVector[n].push_back(line.substr(start,stop-start));
start=line.find_first_not_of(", ",stop);
stop=line.find_first_of(", ",start);
}
}
infile.close();
}
I'm assuming dataVector is a vector<vector<string> > member in your class. I'm also assuming that the csv data in the file will look something like:
1 2
ABC,123,DEF,456,GHI,789
987,IHG,654,FED,321,CBA
I think that getting the number of lines in the file just to resize the vector in dataVector is potentially wasteful. Why not just read them directly from the file into dataVector as you encounter them? vectors are dynamic and thus perfect for this. Perhaps something like this?
#include <fstream>
#include <string>
#include <vector>
usingnamespace std;
vector<vector<string> > dataVector;
int main()
{
ifstream in("test.csv"); // open file
string buf; // buffer for getline
while(getline(in,buf)) // read lines in file till error or eof
{
vector<string> tmp; // temp vector to tokenize csv lines
size_t pos = buf.find(","); // find first delimiter
while(!buf.empty()) // we will cannibalize the string as we tokenize it
{
tmp.push_back(buf.substr(0,pos)); // get string before delimiter
buf.erase(0,pos+1); // delete to delimiter
pos = buf.find(","); // find next delimiter
if(pos == string::npos) // no more delimiters
{
tmp.push_back(buf);
buf.clear(); // empty out string
}
}
dataVector.push_back(tmp);
}
return 0;
}
void setVector (){
//Opens .csv file and checks that it can be read.
std::ifstream infile(("file.csv").c_str()); //the file name must be a c-str
std::string buf;
if (!infile.is_open()){
cout << "Unable to open +"file.csv" << endl;
exit(1);
}
std::vector<std::string> tmp; //Declaring temp vector and pos as an int
size_t pos;
getline(infile,buf); //Skips the label line
while(getline(infile,buf)){
pos=buf.find(',');
if (pos==string::npos) continue; //Skips blank lines
while(!buf.empty()){
tmp.push_back(buf.substr(0,pos));
buf.erase(0,pos+1);
pos = buf.find(',');
if (pos==string::npos){
tmp.push_back(buf);
buf.clear();
}
}
dataVector.push_back(tmp);
tmp.clear(); //Resets tmp vector to be used again
}
infile.close();
}