mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
readColumns.hpp
Go to the documentation of this file.
1 /** \file readColumns.hpp
2  * \author Jared R. Males
3  * \brief A utility to read in columns from a text file.
4  * \ingroup asciiutils
5  */
6 
7 //***********************************************************************//
8 // Copyright 2015, 2016, 2017 Jared R. Males (jaredmales@gmail.com)
9 //
10 // This file is part of mxlib.
11 //
12 // mxlib is free software: you can redistribute it and/or modify
13 // it under the terms of the GNU General Public License as published by
14 // the Free Software Foundation, either version 3 of the License, or
15 // (at your option) any later version.
16 //
17 // mxlib is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU General Public License for more details.
21 //
22 // You should have received a copy of the GNU General Public License
23 // along with mxlib. If not, see <http://www.gnu.org/licenses/>.
24 //***********************************************************************//
25 
26 #ifndef __readColumns_hpp__
27 #define __readColumns_hpp__
28 
29 #include <fstream>
30 #include <string>
31 #include <cstring>
32 #include <iostream>
33 
34 #include "../mxlib.hpp"
35 #include "../mxError.hpp"
36 
37 #include "stringUtils.hpp"
38 
39 #define MX_READCOL_MISSINGVALSTR "-99"
40 
41 namespace mx
42 {
43 namespace ioutils
44 {
45 
46 template<char delim=' ', char eol='\n'>
47 void readcol(char * sin, int sz)
48 {
49  static_cast<void>(sin);
50  static_cast<void>(sz);
51 
52  return;
53 }
54 
55 template<char delim=' ', char eol='\n', typename arrT, typename... arrTs>
56 void readcol(char * sin, int sz, arrT & array, arrTs &... arrays)
57 {
58  //static const unsigned short int nargs = sizeof...(arrTs);
59  std::string str;
60 
61  int i=0;
62  int l = strlen(sin);
63 
64  if(l < 1) return;
65 
66  //Eat white space
67  while( isspace(sin[i]) && sin[i] != eol && i < l) ++i;
68  sin = sin + i;
69  sz = sz -i;
70 
71  //If there's nothing here, we still need to populate the vector
72  if(sz <= 1)
73  {
74  array.push_back(convertFromString<typename arrT::value_type>(""));
75  return;
76  }
77 
78  std::stringstream sinstr(sin);
79 
80  std::getline(sinstr, str, delim);
81 
82  //Last entry in line might contain eol
83  if( str[str.size()-1] == eol)
84  {
85  str.erase(str.size()-1);
86  }
87 
88  if( str.size() == 0 )
89  {
90  array.push_back(convertFromString<typename arrT::value_type>(MX_READCOL_MISSINGVALSTR));
91  }
92  else
93  {
94  array.push_back(convertFromString<typename arrT::value_type>(str));
95  }
96 
97  sin += ( str.size()+1)*sizeof(char);
98  sz -= ( str.size()+1)*sizeof(char);
99 
100  readcol<delim,eol>(sin, sz, arrays...);
101 
102 }
103 
104 
105 
106 ///Read in columns from a text file
107 /** This function opens a file containing data formatted in columns and reads in the data row by row.
108  * The data are stored in std::vectors, which should not be pre-allocated (though they could be reserve()-ed).
109  *
110  * Example:
111  * \code
112  * std::vector<int> i1;
113  * std::vector<float> f1;
114  * std::vector<double> d1;
115  *
116  * readColumns("data_file.txt", i1, f1, d1);
117  * \endcode
118  *
119  * Note that the types of the vectors do not need to be specified as template arguments.
120  *
121  * The format of the file can be specified with template arguments like
122  * \code
123  * readColumns<',', ';', '\r'>("data_file.csv", i1, f1, d1);
124  * \endcode
125  * which sets the delimmiter to comma, the comment character to ;, and the end-of-line to \\r.
126  *
127  * Columns can be skipped using mx::ioutils::skipCol.
128  *
129  * \tparam delim is the character separating columns, by default this is space.
130  * \tparam comment is the character starting a comment. by default this is #
131  * \tparam eol is the end of line character. by default this is \n
132  * \tparam arrTs a variadic list of array types. this is not specified by the user.
133  *
134  * \todo lineSize should be configurable
135  *
136  * \ingroup asciiutils
137  */
138 template<char delim=' ', char comment='#', char eol='\n', typename... arrTs>
139 int readColumns( const std::string & fname, ///< [in] is the file name to read from
140  arrTs &... arrays ///< [out] a variadic list of std::vectors. Any number with mixed value_type can be specified. Neither allocated nor cleared, so repeated calls will append data.
141  )
142 {
143  //open file
144  errno = 0;
145  std::ifstream fin;
146  fin.open(fname);
147 
148  if(!fin.good())
149  {
150  if(errno != 0)
151  {
152  mxPError("readColumns", errno, "Occurred while opening " + fname + " for reading.");
153  }
154  else
155  {
156  mxError("readColumns", MXE_FILEOERR, "Occurred while opening " + fname + " for reading.");
157  }
158  return -1;
159  }
160 
161  int lineSize = 4096;
162  char * line = new char[lineSize];
163 
164  while(fin.good())
165  {
166  //Save one space for adding eol
167  fin.getline(line, lineSize-1, eol);
168 
169  int i=0;
170  int l = strlen(line);
171 
172  if(l <= 0) break;
173 
174  //std::cerr << line << "\n";
175 
176  //Find start of comment and end line at that point.
177  while(line[i] != comment )
178  {
179  ++i;
180  if( i == l ) break;
181  }
182 
183  if(i <= l-1)
184  {
185  line[i] = '\0';
186  }
187 
188  l = strlen(line);
189 
190  if(l == 0) continue;
191 
192  //Make sure line ends with eol
193  line[l] = eol;
194  ++l;
195  line[l] = '\0';
196 
197  readcol<delim,eol>(line, strlen(line), arrays...);
198  }
199 
200  delete[] line;
201 
202  //getline will have set fail if there was no new line on the last line.
203  if(fin.bad() && !fin.fail())
204  {
205  if(errno != 0)
206  {
207  mxPError("readColumns", errno, "Occurred while reading from " + fname + ".");
208  }
209  else
210  {
211  mxError("readColumns", MXE_FILERERR, "Occurred while reading from " + fname + ".");
212  }
213  return -1;
214  }
215 
216  fin.clear(); //Clear the fail bit which may have been set by getline
217  fin.close();
218 
219  if(fin.fail())
220  {
221  if(errno != 0)
222  {
223  mxPError("readColumns", errno, "Occurred while closing " + fname + ".");
224  }
225  else
226  {
227  mxError("readColumns", MXE_FILECERR, "Occurred while closing " + fname + ".");
228  }
229  return -1;
230  }
231 
232 
233 
234  return 0;
235 }
236 
237 ///A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocation.
238 /** The alternative is to use dummy vectors, which result in excess memory allocations and deallocations.
239  * Usage:
240  \code
241  std::vector<T> col1, col5;
242  skipCol sk;
243  readColumns("filename.txt", col1, sk, sk, sk, col5); //This results in only columns 1 and 5 being stored.
244  \endcode
245  *
246  * \ingroup asciiutils
247  */
248 struct skipCol
249 {
250  typedef std::string value_type; ///< value_type is defined as std::string so that no conversions take place.
251 
252  template<typename T>
253  void push_back( const T & arg )
254  {
255  return;
256  }
257 };
258 
259 
260 } //namespace ioutils
261 } //namespace mx
262 
263 #endif //__readColumns_hpp__
int readColumns(const std::string &fname, arrTs &... arrays)
Read in columns from a text file.
The mxlib c++ namespace.
Definition: mxError.hpp:107
Utilities for working with strings.
A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocat...
std::string value_type
value_type is defined as std::string so that no conversions take place.