mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
readColumns.hpp
Go to the documentation of this file.
1/** \file readColumns.hpp
2 * \author Jared R. Males
3 * \brief A utility to read in columns from a text file.
4 * \ingroup asciiutils
5 */
6
7//***********************************************************************//
8// Copyright 2015, 2016, 2017 Jared R. Males (jaredmales@gmail.com)
9//
10// This file is part of mxlib.
11//
12// mxlib is free software: you can redistribute it and/or modify
13// it under the terms of the GNU General Public License as published by
14// the Free Software Foundation, either version 3 of the License, or
15// (at your option) any later version.
16//
17// mxlib is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20// GNU General Public License for more details.
21//
22// You should have received a copy of the GNU General Public License
23// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
24//***********************************************************************//
25
26#ifndef __readColumns_hpp__
27#define __readColumns_hpp__
28
29#include <cstring>
30#include <string>
31#include <fstream>
32#include <format>
33
34#include "../mxlib.hpp"
35
36#include "stringUtils.hpp"
37
38#define MX_READCOL_MISSINGVALSTR "-99"
39
40namespace mx
41{
42namespace ioutils
43{
44
45struct readColSpaceDelim
46{
47 static constexpr char delim = ' ';
48 static constexpr char strDelim = '"';
49 static constexpr char eol = '\n';
50 static constexpr char comment = '#';
51 static constexpr const char *missingValStr = MX_READCOL_MISSINGVALSTR;
52};
53
54struct readColCommaDelim
55{
56 static constexpr char delim = ',';
57 static constexpr char strdelim = '"';
58 static constexpr char eol = '\n';
59 static constexpr char comment = '#';
60 static constexpr const char *missingValStr = MX_READCOL_MISSINGVALSTR;
61};
62
63template <class delimT, class verboseT>
64error_t readcol( [[maybe_unused]] const char *sin, [[maybe_unused]] int sz, [[maybe_unused]] int & colno )
65{
66 return error_t::noerror;
67}
68
69template <class delimT, class verboseT, typename arrT, typename... arrTs>
70error_t readcol( const char *sin, int sz, int & colno, arrT &array, arrTs &...arrays )
71{
72 try
73 {
74
75 // static const unsigned short int nargs = sizeof...(arrTs);
76 std::string str;
77
78 int i = 0;
79 int l = strlen( sin );
80
81 if( l < 1 )
82 {
83 return error_t::noerror;
84 }
85
86 // Eat white space
87 while( isspace( sin[i] ) && sin[i] != delimT::eol && i < l )
88 {
89 ++i;
90 }
91 sin = sin + i;
92 sz = sz - i;
93
94 // If there's nothing here, we still need to populate the vector
95 if( sz <= 1 )
96 {
97 array.push_back( convertFromString<typename arrT::value_type>( "" ) );
98 return error_t::noerror;
99 }
100
101 std::stringstream sinstr( sin );
102
103 std::getline( sinstr, str, delimT::delim );
104
105 // Last entry in line might contain eol
106 if( str[str.size() - 1] == delimT::eol )
107 {
108 str.erase( str.size() - 1 );
109 }
110
111 if( str.size() == 0 )
112 {
113 array.push_back( convertFromString<typename arrT::value_type>( MX_READCOL_MISSINGVALSTR ) );
114 }
115 else
116 {
117 array.push_back( convertFromString<typename arrT::value_type>( str ) );
118 }
119
120 sin += ( str.size() + 1 ) * sizeof( char );
121 sz -= ( str.size() + 1 ) * sizeof( char );
122 }
123 catch( const std::invalid_argument &e )
124 {
125 return internal::mxlib_error_report<verboseT>( error_t::std_invalid_argument,
126 std::format( "processing column {}: {}", colno, e.what() ) );
127 }
128 catch( const std::out_of_range &e )
129 {
130 return internal::mxlib_error_report<verboseT>( error_t::std_out_of_range,
131 std::format( "processing column {}: {}", colno, e.what() ) );
132 }
133 catch( const std::exception &e )
134 {
135 return internal::mxlib_error_report<verboseT>( error_t::exception,
136 std::format( "processing column {}: {}", colno, e.what() ) );
137 }
138 catch( ... )
139 {
140 return internal::mxlib_error_report<verboseT>( error_t::exception, std::format( "processing column {}.", colno ) );
141 }
142
143 ++colno;
144 return readcol<delimT, verboseT>( sin, sz, colno, arrays... );
145}
146
147/// Read in columns from a text file
148/** This function opens a file containing data formatted in columns and reads in the data row by row.
149 * The data are stored in std::vectors, which should not be pre-allocated (though they could be reserve()-ed).
150 *
151 * Example:
152 * \code
153 * std::vector<int> i1;
154 * std::vector<float> f1;
155 * std::vector<double> d1;
156 *
157 * readColumns("data_file.txt", i1, f1, d1);
158 * \endcode
159 *
160 * Note that the types of the vectors do not need to be specified as template arguments.
161 *
162 * The format of the file can be specified with template arguments like
163 * \code
164 * readColumns<',', ';', '\r'>("data_file.csv", i1, f1, d1);
165 * \endcode
166 * which sets the delimmiter to comma, the comment character to ;, and the end-of-line to \\r.
167 *
168 * Columns can be skipped using mx::ioutils::skipCol.
169 *
170 * \tparam delim is the character separating columns, by default this is space.
171 * \tparam comment is the character starting a comment. by default this is #
172 * \tparam eol is the end of line character. by default this is \n
173 * \tparam arrTs a variadic list of array types. this is not specified by the user.
174 *
175 * \todo lineSize should be configurable
176 *
177 * \ingroup asciiutils
178 */
179template <class delimT = readColSpaceDelim, class verboseT = verbose::vvv, typename... arrTs>
180error_t readColumns( const std::string &fname, ///< [in] is the file name to read from
181 arrTs &...arrays /**< [out] a variadic list of std::vectors. Any number with mixed
182 value_type can be specified. Neither allocated nor cleared,
183 so repeated calls will append data.*/
184)
185{
186 // open file
187 errno = 0;
188 std::ifstream fin;
189 fin.open( fname );
190
191 if( !fin.good() )
192 {
193 error_t errc;
194 if( errno != 0 )
195 {
196 errc = errno2error_t( errno );
197 }
198 else
199 {
200 errc = error_t::fileoerr;
201 }
202
203 return internal::mxlib_error_report<verboseT>( errc, "Opening " + fname + " for reading" );
204 }
205
206 std::string line;
207
208 int64_t lineno = -1;
209
210 while( fin.good() )
211 {
212 ++lineno;
213 try
214 {
215 std::getline( fin, line, delimT::eol );
216 }
217 catch( const std::exception &e )
218 {
219 return internal::mxlib_error_report<verboseT>(
221 std::format( "Reading from {} at line {}. {}.", fname, lineno, e.what() ) );
222 }
223 catch( ... )
224 {
225 return internal::mxlib_error_report<verboseT>(
227 std::format( "Reading from {} at line {}.", fname, lineno ) );
228 }
229
230 if( line.size() == 0 )
231 {
232 continue;
233 }
234
235 // Find start of comment and end line at that point.
236 size_t i = 0;
237 bool nonspace = false; // record if we find a non-space character before the comment
238 while( i < line.size() && line[i] != delimT::comment )
239 {
240 if( !nonspace && !isspace( line[i] ) )
241 {
242 nonspace = true;
243 }
244 ++i;
245 }
246
247 // Check if line is all comment
248 if( i == 0 || !nonspace )
249 {
250 continue;
251 }
252
253 if( i < line.size() ) // i is > 0 if we're here
254 {
255 line.erase( line.begin() + i, line.end() ); // does not throw
256 }
257
258 int colno = 0;
259 error_t errc = readcol<delimT, verboseT>( line.c_str(), line.size(), colno, arrays... );
260
261 if(errc != error_t::noerror)
262 {
263 return internal::mxlib_error_report<verboseT>( errc, std::format("Reading from {} at line {} column {}",fname, lineno+1, colno+1) );
264 }
265 }
266
267 // getline will have set fail if there was no new line on the last line.
268 if( fin.bad() && !fin.fail() )
269 {
270 error_t errc;
271 if( errno != 0 )
272 {
273 errc = errno2error_t( errno );
274 }
275 else
276 {
277 errc = error_t::filererr;
278 }
279
280 return internal::mxlib_error_report<verboseT>( errc, "Reading from " + fname );
281 }
282
283 fin.clear(); // Clear the fail bit which may have been set by getline
284 errno = 0;
285 fin.close();
286
287 if( fin.fail() )
288 {
289 error_t errc;
290 if( errno != 0 )
291 {
292 errc = errno2error_t( errno );
293 }
294 else
295 {
296 errc = error_t::filecerr;
297 }
298
299 return internal::mxlib_error_report<verboseT>( errc, "Closing" + fname );
300 }
301
302 return error_t::noerror;
303}
304
305/// A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocation.
306/** The alternative is to use dummy vectors, which result in excess memory allocations and deallocations.
307 * Usage:
308 \code
309 std::vector<T> col1, col5;
310 skipCol sk;
311 readColumns("filename.txt", col1, sk, sk, sk, col5); //This results in only columns 1 and 5 being stored.
312 \endcode
313 *
314 * \ingroup asciiutils
315 */
317{
318 typedef std::string value_type; ///< value_type is defined as std::string so that no conversions take place.
319
320 template <typename T>
321 void push_back( const T &arg )
322 {
323 return;
324 }
325};
326
327} // namespace ioutils
328} // namespace mx
329
330#endif //__readColumns_hpp__
error_t readColumns(const std::string &fname, arrTs &...arrays)
Read in columns from a text file.
error_t
The mxlib error codes.
Definition error_t.hpp:20
static constexpr error_t errno2error_t(const int &err)
Convert an errno code to error_t.
Definition error_t.hpp:614
@ noerror
No error has occurred.
@ filererr
An error occurred while reading from a file.
@ exception
An exception was thrown.
@ filecerr
An error occurred while closing a file.
@ fileoerr
An error occurred while opening a file.
@ std_out_of_range
An out of range exception was thrown.
@ std_invalid_argument
An invalid argument exception was thrown.
The mxlib c++ namespace.
Definition mxError.hpp:40
Utilities for working with strings.
A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocat...
std::string value_type
value_type is defined as std::string so that no conversions take place.