Bo Peng wrote:
I have a ready-to-use class that parse CSV file. Is there any interest
to integrate this in LyX?
I am. Entering results to lyx/table has been painful. It would be good
if you can also import tab/space separated files.
Yes, this class accept any kind of delimiter... attached.
I don't know anything about tables and longtables, is there anybody
willing to do the proper tabular creation?
Abdel.
/////////////////////////////////////////////////////////////////////////////////
/// \file csv.cpp
/// Implementation file of the \c csv class.
///
/// \date Feb, 2003
/// \author Abdelrazak Younes
///
///
// For forward declaration use
// #include <iosfwd>
// For file positionning use
// #include <ios>
#include "csv.hpp"
#include "utils.hpp"
using namespace std;
namespace pegasusalgo {
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
csv::csv(const string ifname,
const string KeyName,
const bool b_readAll): ReadAll(b_readAll)
{
init(ifname,KeyName,b_readAll);
}
/// Inits the csv object.
bool csv::init(
const string ifname, ///< CSV file to read.
const string KeyName, ///< CSV file format.
const bool b_readAll ///< read all the file contents is true !
)
{
ReadAll = b_readAll;
if (ifname.empty())
return false;
if (KeyName.empty()) // will initialise data type only if
format is recognized
{
if ( _detect_file_format(ifname) ) // Detect file
format and put it in Type member !
{
init_type(Type);
}
}
else
{
init_type(KeyName);
}
if (!init_io(ifname))
return false;
if (ReadAll)
{
readall();
}
return true;
}
//////////////////////////////////////////////////////
size_t csv::size() {
return DataSize;
}
///. This member function initialise the csv object with the csv file
description
/// which name is passed as an argument.
/*
void csv::init(const string CSVdescriptionFile)
{
parameters = loadDescriptionFile(CSVdescriptionFile);
_check_parameters();
}
*/
///. This member function initialise the csv object with the csv file
description
/// which name is passed as an argument.
void csv::init_type(const string KeyName)
{
Type = KeyName;
string DescriptionFileName = "./dynamics/" + KeyName + ".dsc";
parameters = loadDescriptionFile( DescriptionFileName );
_check_parameters();
}
////////////////////////////////////////////////////////
///.
void csv::_check_parameters()
{
if ( parameters.empty() )
{
cerr << "WARNING: the CSVdescriptionFile variable is
empty" << endl
<< "\tThe program will extract all the Data" <<
endl;
return;
}
map< string, vector<string> >::iterator I;
// First check CVS file format (Type)
if ( (I = parameters.find("TYPE")) != parameters.end() )
{
if ( I->second[0] != Type )
{
string toto=I->second[0];
cerr << "WARNING: the CSV description File is
of " << I->second[0] << " type, "
<< "instead of " << Type << endl
<< "\tThe program will extract all the
Data" << endl;
Type = "";
return;
}
I->second.clear();
parameters.erase(I);
}
// Second check if there is a VALID field in the CVS file format
if ( (I = parameters.find("VALID_FLAG")) != parameters.end() )
{
if ( I->second.empty() )
invalidFlag = "";
else {
invalidFlag = I->second[0];
invalidFlagValue = I->second[1];
}
I->second.clear();
parameters.erase(I);
}
// Preparation for extraction: Concatenates the field into one
vector of string.
NeededParams = concat(parameters);
paramPositions.resize(NeededParams.size());
}
///////////////////////////////////////////////////////////////////////
/// Verify the presence of all needed parameters
///
void csv::verify_header()
{
if (Type.empty())
{
NeededParams = PresentParams;
paramPositions.resize(NeededParams.size());
return;
}
size_t nbrParams = NeededParams.size();
if (!invalidFlag.empty())
{
invalidFlagPosition = distance(
PresentParams.begin(),
find(PresentParams.begin(),
PresentParams.end(), invalidFlag) );
}
else
{
invalidFlagPosition = -1000;
}
for (size_t i=0; i!=nbrParams; ++i)
{
paramPositions[i] = distance(
PresentParams.begin(),
find(PresentParams.begin(),
PresentParams.end(), NeededParams[i]) );
if (paramPositions[i] == PresentParams.size())
{
string MissingPar = NeededParams[i];
cerr << "ERROR: The input parameter '"
<< NeededParams[i]
<< "' is not present in the input Data
file"
<< "\nPlease verify" << endl;
exit(11);
}
}
}
///////////////////////////////////////////////////////////////////////
void csv::save(const string ofname) {
ofstream output;
string outputfile, base(ofname);
base = '_'+ base;
// vector<string> temp;
for (csv_data::iterator K=Data.begin(); K!=Data.end(); ++K) {
outputfile = K->first + base;
output.open(outputfile.c_str());
// temp = reverse_copy(K->second);
// output << temp;
output << K->second;
// temp.clear();
output.close(); }
}
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
bool csv::init_io(const string ifname)
{
input.open(ifname.c_str());
if (!input.is_open()) {
cerr << "ERROR: cannot open file "
<< ifname << endl;
return false;
// exit(10);
}
printMessage("Extracting data from csv file: " + ifname + " ");
read_header();
verify_header();
currentLinePosition = 0;
return true;
}
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
void csv::read_header()
{
// extraction of the first line to a list of strings
char * Line_str = new char[10000];
string Line;
input.getline(Line_str,10000);// input.get();
Line = Line_str;
Separator = DetectSeparator(Line);
cout << "************\nSeparator is " << Separator << endl;
PresentParams = ExtractParams(Line,Separator);
}
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
void csv::readall()
{
printMessage("Extracting all data csv file");
// Initialisation of the map with the NeededParams Entries
vector<string> dummy; // Empty dummy vector
size_t i;
for (i=0; i!=NeededParams.size();++i) {
Data[NeededParams[i]] = dummy;
Data[NeededParams[i]].reserve(10000); }
vector<string> extractedParams;
while (!input.eof())
{
readline();
push_back();
}
input.close();
DataSize = Data[NeededParams[0]].size();
cout << "[OK]" << endl;
cout << "\tNumber of samples: " << DataSize << endl;
cout << "@" << endl;
}
///////////////////////////////////////////////////////////////////////
void csv::read(const string ifname)
{
ifstream input(ifname.c_str());
if (!input.is_open()) {
cerr << "ERROR: cannot open file "
<< ifname << endl;
exit(10);
}
printMessage("Extracting data from csv file: " + ifname + " ");
// extraction of the first line to a list of strings
char * Line_str = new char[10000];
string Line;
input.getline(Line_str,10000);// input.get();
Line = Line_str;
Separator = DetectSeparator(Line);
// cout << "************\nSeparator is " << Separator << endl;
PresentParams = ExtractParams(Line,Separator);
vector<string> dummy; // Empty dummy vector
size_t pos, i, N = PresentParams.size();
for (i=0; i!=N-1; ++i)
Data[PresentParams[i]] = dummy;
string STR;
while(!input.eof())
{
for (i=0; i!=N-1; ++i)
{
std::getline(input, STR, Separator);
if (input.fail()) break;
//if (input.state == failbit) break;
Data[PresentParams[i]].push_back(STR); //
Insert STR in the (*I) pointed list
}
if (input.fail()) break;
// Special case for last element
// get what remains in the current line
std::getline(input,STR);
// First eliminate Carriage Return Char (code 13) if
present
pos = STR.find(13); if (pos<STR.size())
STR.erase(STR.begin()+pos);
// Then eliminate delimiter if present and what follows
the delimiter
pos = STR.find(Separator); if (pos<STR.size())
STR.erase(pos,STR.size());
// Then insert STR in the current vector
Data[PresentParams[N-1]].push_back(STR);
}
input.close();
DataSize = Data[PresentParams[0]].size();
cout << "[OK]" << endl;
cout << "\tNeededParams\n" << NeededParams << endl;
cout << "\tPresentParams\n" << PresentParams << endl;
cout << "\tNumber of samples: " << DataSize << endl;
cout << "@" << endl;
}
////////////////////////////////////////////////////////
///.
bool csv::check_current_line()
{
if (currentLine.empty())
return false;
if ( currentLine.size() != PresentParams.size() )
{
cerr << "WARNING: Incorrect number of colums at line "
<< currentLinePosition << endl
<< " PresentParams.size() " <<
PresentParams.size()
<< " currentLine.size() " << currentLine.size()
<< " PresentParams " << PresentParams
<< " currentLine " << currentLine
<< "\tSkipping line..." << endl;
return false;
}
if ((invalidFlagPosition != -1000) && (
currentLine[invalidFlagPosition] == invalidFlagValue ))
{
return false;
}
return true;
}
////////////////////////////////////////////////////////
///.
bool csv::check_line(vector<string> &V)
{
if (V.empty()) return false;
if ( V.size() != PresentParams.size() )
{
cerr << "WARNING: Incorrect number of colums at line "
<< Data[ NeededParams[0] ].size() << endl
<< "\tSkipping line..." << endl;
return false;
}
if ((invalidFlagPosition != -1000) && ( V[invalidFlagPosition]
== invalidFlagValue ))
{
// cerr << "WARNING: Invalid Flag found at
line " << Data[ NeededParams[0] ].size() << endl
// << "\tSkipping line..." << endl;
return false;
}
return true;
}
////////////////////////////////////////////////////////
///.
void csv::push_back()
{
if ( check_current_line() )
{
size_t i, N=NeededParams.size();
if (ReadAll)
for (i=0; i<N;++i)
{
Data[ NeededParams[i] ].push_back(
currentLine[ paramPositions[i] ] );
}
else
{
LineVector.clear();
for (i=0; i<N;++i)
{
// For a map do
// LineMap[
NeededParams[i] ] = currentLine[ paramPositions[i] ] ;
// For a vector do:
LineVector.push_back(
currentLine[ paramPositions[i] ] );
}
}
}
}
////////////////////////////////////////////////////////
///.
bool csv::getline()
{
if (readline())
{
push_back();
return true;
}
else
return false;
}
/*
map<string,string> L;
if ( check_current_line() )
{
size_t i, N=NeededParams.size();
for (i=0; i<N;++i)
{
L[ NeededParams[i] ] = currentLine[ paramPositions[i] ] ;
}
}
return L;
}
*/
bool csv::getline(vector<string>& V)
{
if (readline())
return true;
V.clear();
if ( check_current_line() )
{
size_t i, N=NeededParams.size();
V.reserve(N);
for (i=0; i<N;++i)
{
V.push_back(currentLine[ paramPositions[i] ]) ;
}
}
return false;
}
////////////////////////////////////////////////////////
///.
void csv::push_back(vector<string> &V)
{
if ( check_line(V) )
{
size_t i, N=NeededParams.size();
for (i=0; i<N;++i)
{
Data[ NeededParams[i] ].push_back( V[
paramPositions[i] ] );
}
}
/*
cout << "NeededParams.size() " << N << endl;
cout << V << endl;
cout << "V.size() " << V.size() << endl;
*/
}
///////////////////////////////////////////////////////////////////////
bool csv::readline()
{
char * Line_str = new char[10000];
input.getline(Line_str,10000);// input.get();
string Line = Line_str;
currentLine.clear();
if ( Line.empty() || input.eof() )
{
//currentLine.clear();
return false;
}
currentLine.reserve(paramPositions.size()+10);
// currentLine = ExtractParams(Line,Separator);
ExtractParams(currentLine, Line, Separator);
currentLinePosition++;
return true;
}
///////////////////////////////////////////////////////////////////////
void csv::read(const string ifname, const string KeyName)
{
ifstream input(ifname.c_str());
if (!input.is_open()) {
cerr << "ERROR: cannot open file"
<< ifname << endl;
exit(10);
}
printMessage("extracting data from " + KeyName + " csv file: "
+ ifname + " ");
// extraction of the first line to a list of strings
char * Line_str = new char[10000];
string Line;
input.getline(Line_str,10000);// input.get();
Line = Line_str;
Separator = DetectSeparator(Line);
PresentParams = ExtractParams(Line,Separator);
//VerifyFirstLine(PresentParams,NeededParams);
verify_header();
// Initialisation of the map with the NeededParams Entries
vector<string> dummy; // Empty dummy vector
size_t i;
for (i=0; i!=NeededParams.size();++i)
{
Data[NeededParams[i]] = dummy;
Data[NeededParams[i]].reserve(10000);
}
vector<string> extractedParams;
while (!input.eof())
{
bool tooto = input.getline(Line_str,10000).fail();//
input.get();
Line = Line_str;
if ( Line.empty() ) continue;
extractedParams = ExtractParams(Line,Separator);
push_back(extractedParams);
}
input.close();
DataSize = Data[NeededParams[0]].size();
cout << "[OK]" << endl;
cout << "\tNumber of samples: " << DataSize << endl;
cout << "@" << endl;
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
map< string, vector<string> >::iterator
csv::check_param_description(const string Param,
const size_t Size)
{
map< string, vector<string> >::iterator I ;
// First check Param presence CVS file format description
if ( (I = parameters.find(Param)) == parameters.end() )
{
cerr << "ERROR: cannot find " << Param << " field in "
<< Type << " CSV description File." << endl;
exit(11);
}
else
{
if ( I->second.size() != Size ) // we need 2 elements:
week and seconds
{
cerr << "ERROR: invalid 'LGH_POSITION' field in
the " << Type
<< " CSV description File, " << Size <<
" elements are needed!" << endl;
exit(11);
}
}
return I;
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
bool csv::check_param_description(const string Param,
const size_t Size,
map< string, vector<string> >::iterator & I)
{
// First check Param presence CVS file format description
if ( (I = parameters.find(Param)) == parameters.end() )
{
return false;
}
else
{
if ( I->second.size() != Size ) // we need 2 elements:
week and seconds
{
return false;
}
}
return true;
}
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
bool csv::check_format(const string field,
const size_t paramNbr,
vector<size_t>& indexVector)
{
map< string, vector<string> >::iterator I;
// First check Param presence CVS file format description
if (!check_param_description(field, paramNbr, I))
{
cerr << "ERROR: Problem with " << field << " field in "
<< Type << " CSV description File." << endl;
return false;
}
vector<string>::iterator it_found;
for (size_t i=0; i<paramNbr; ++i)
{
it_found = std::find(NeededParams.begin(),
NeededParams.end(), I->second[i]);
if (it_found == NeededParams.end())
{
cerr << "ERROR: Cannot find " << I->second[i]
<< " parameter in "
<< Type << " CSV description File." <<
endl;
return false;
}
indexVector.push_back(std::distance(NeededParams.begin(), it_found));
}
return true;
}
//////////////////////////////////////////////////////////////////////
///. Detects the Format of a CSV file and returns
/// "WINGPSALL" for a WINGPSALL output file
/// "CONVERTOR" for a '.1Hz' output file from CONVERTOR
/// "GEOGENIUS"
/// "CARRIERPHASE"
/// "PROCEDURE"
/// "AIRCRAFT" for aircraft data (dynamics and meteo)
/// "UNKNOWN" if unknown ;)
///
bool csv::_detect_file_format(const string ifname)
{
ifstream input(ifname.c_str());
if (!input.is_open()) {
cerr << "ERROR: cannot open file "
<< ifname << endl;
exit(11);
}
// extraction of the first line to a list of strings
char * Line_str = new char[10000];
input.getline(Line_str,10000);// input.get();
input.close();
string Line = Line_str;
string::size_type pos;
char C;
if (Line[0] == '"')
pos = 1;
else
pos = 0;
C = Line[pos];
switch(C) {
case 'S': // For "Sample"
pos = Line.find('W');
if (Line[pos-1] == '-') // It's a WINGPSALL csv
output file
Type="WINGPSALL";
else
Type="CONVERTOR"; // It's a .1Hz output file
from CONVERTOR
break;
case 'G':
if (Line[pos+1] == 'P') // For GPS_Week
Type="GEOGENIUS";
else // For GWeek
Type="CARRIERPHASE";
break;
case 'W': // For WP_LAT (Way Point)
Type="PROCEDURE";
break;
default:
Type="UNKNOWN";
return false;
}
return true;
}
}; // pegasusalgo
/*
string STR;
size_t N=PresentParams.size();
while(!input.eof())
{
for (i=0; i<N-1;++i)
{
if (input.fail()) break;
if ( Data.find(PresentParams[i]) != Data.end() )
Data[PresentParams[i]].push_back(STR); // Insert STR in the
current vector
}
if (input.fail()) break;
// Special case for last element
// get what remains in the current line
getline(input,STR);
if (Data.find(PresentParams[N-1])!=Data.end())
{
// First eliminate Carriage Return Char
(code 13) if present
pos = STR.find(13); if (pos<STR.size())
STR.erase(STR.begin()+pos);
// Then eliminate delimiter if present
and what follows the delimiter
pos = STR.find(Separator); if
(pos<STR.size()) STR.erase(pos,STR.size());
// Then insert STR in the current
vector
Data[PresentParams[N-1]].push_back(STR);
}
}
*/
/////////////////////////////////////////////////////////////////////////////////
/// \file csv.hpp
/// Definition header of the @c csv class.
///
/// \date Feb, 2003
/// \author Abdelrazak Younes
///
///
#ifndef _CSV
#define _CSV
#include "portability.hpp"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <valarray>
#include <functional>
#include <algorithm>
#include <numeric>
#include <utility>
#include <cstdlib>
#include <ctime>
#include <cmath>
#include <cstring>
using namespace std;
namespace pegasusalgo {
/// CSV Valid Flag Parameter name and value.
/// In this structure are defined the names of the usefull parameter in
a
/// CSV input data file.
/// \todo convert this to a vector of string and an enum to get rid of
the need
/// of the CSV desription file.
struct timed_csv_params {
char VALID[10];
char INVALID_FLAG[10];
char Week[10];
char Second[10];
char TYPE[20];
};
/// csv file reader.
/// A csv object reads any file with a "coma separated value" format.
/// The read values are stored in a map which keys are defined in the
/// file header and the data are stored in vector of strings
///
class csv
{
typedef std::map<std::string, std::vector<std::string> >
csv_data;
public:
/// csv default destructor.
~csv() {
};
/// csv default constructor.
csv() {
};
/// csv 3rd consructor with initialisation and data extraction.
csv(
const std::string ifname, ///< CSV file to read.
const std::string KeyName = "", ///< CSV file format.
const bool b_readAll = true ///< read all the file
contents if true !
);
/// Automatic csv format detector.
/// \return status of detection
/// \retval true if detection is successfull
/// \retval false if detection is unsuccessfull
bool _detect_file_format(
const std::string ifname ///< CSV file to read.
);
/// Inits the csv object.
bool init_io(
const std::string ifname ///< CSV file to read.
);
/// Inits the csv object.
bool csv::init(
const std::string ifname, ///< CSV file to read.
const std::string KeyName = "", ///< CSV file format.
const bool b_readAll = true ///< read all the file
contents if true !
);
/// Inits the csv object.
void init_type(
const std::string KeyName ///< CSV file Key Name
);
/// Checks the parameters defined in the CSV description file.
void _check_parameters();
/// Checks current CSV line with regards to PresentParams and
the invalidFlag.
/// \return the check status.
/// \retval true the line is OK.
/// \retval false the line will be skipped.
bool check_current_line();
/// Checks a CSV line with regards to PresentParams and the
invalidFlag.
/// \return the check status.
/// \retval true the line is OK.
/// \retval false the line will be skipped.
bool check_line(std::vector<std::string> &V);
////////////////////////////////////////////////////////
///
void push_back(std::vector<std::string> &V);
bool csv::getline();
bool csv::getline(std::vector<std::string> &V);
////////////////////////////////////////////////////////
///
void push_back();
/// Reads all csv content in one shot.
///
void readall();
/// Reads next line of csv file.
/// The read data will be stored in currentLine variable.
///
bool readline();
/// Reads a CSV file with known format.
void read(
const std::string ifname, ///< CSV file to read.
const std::string KeyName ///< CSV type.
);
/// Reads a CSV file with unknown format.
void read(
const std::string ifname ///< CSV file to read.
);
/// Saves Data in multiple files.
void save(
const std::string ofname ///< Prefix of the output file name
);
/// Type of the CSV file as decribed in the CSV description
file.
std::string Type;
std::string invalidFlag;
std::string invalidFlagValue;
int invalidFlagPosition;
/// "Read All" flag.
/// This boolean is set if all the file is read.
bool ReadAll;
/// Autodetected CSV file separator.
/// This is initialised in the read(...) function with the help
of the DetectSeparator
/// function brought by utils.hpp
char Separator;
/// Size of the Data map.
/// This was introduced in order to avoid calling size()
function of one of the Data values
size_t DataSize;
/// Returns the size of the Data.
/// It in fact returns DataSize
size_t size();
/// Needed Parameters.
/// This is either filled up from the CSV file descriptor or
provided by the user.
/// \todo Make more use of this for \c fp class instead of
reading CSVdescription file.
std::vector<std::string> NeededParams;
std::vector<size_t> paramPositions;
/// Vector of parameters actually present in the CSV file.
/// This vector of Present parameters will be compared with the
NeededParams vector
std::vector<std::string> PresentParams;
/// Current Line Contents.
///
std::vector<std::string> currentLine;
int currentLinePosition;
/// map that will store the CSV description parameters.
//map<string, vector<string> > parameters;
csv_data parameters;
/// Current Line stored in a Vector.
/// Only the needed Params are stored.
std::vector<std::string> LineVector;
/// Current Line stored in a Map.
/// The Keys are the needed Params.
std::map<std::string,std::string> LineMap;
/// map that will store all needed data.
/// This map will contains all data extracted from a csv file
which header is included
/// in the NeededParams vector. Unless the format has not been
specified or autodetected
/// in which case all the data will be extracted and stored in
the map Data.
//map<string, vector<string> > Data;
csv_data Data;
/// Read the CSV file header.
///
void read_header();
/// Verifies the first line of the CSV file.
void verify_header();
/// Verifies the presence of a field
bool check_param_description(
const std::string Param,
const size_t Size,
std::map< std::string, std::vector<std::string>
>::iterator & I
);
/// Verifies the presence of a field and exit if failed.
std::map< std::string, std::vector<std::string> >::iterator
check_param_description(
const std::string Param,
const size_t Size
);
bool check_format(
const std::string field,
const size_t paramNbr,
std::vector<size_t>& indexVector
);
/// Eliminates special unusefull characters.
/// '"' and space characters are eliminated.
// inline void EliminateSpecialChar(
// string& Var ///< the
string to cure
// );
/// Extracts a given Key from a given line.
/// \return status of extraction
/// \retval true if extraction is successfull
/// \retval false if extraction is unsuccessfull
// bool ExtractKey(
//string& Line, ///< The string containing the Key string
//string& Key ///< The string
//);
/// Extrats the parameters in vector form.
/// \return vector of parameters (in string form).
// vector<string> ExtractParams(string& Line, const char
delim);
protected:
/// File stream
std::ifstream input;
}; // of class csv
}; // of pegasusalgo
#endif // _CSV