mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
readColumns.hpp
Go to the documentation of this file.
1/** \file readColumns.hpp
2 * \author Jared R. Males
3 * \brief A utility to read in columns from a text file.
4 * \ingroup asciiutils
5 */
6
7//***********************************************************************//
8// Copyright 2015, 2016, 2017 Jared R. Males (jaredmales@gmail.com)
9//
10// This file is part of mxlib.
11//
12// mxlib is free software: you can redistribute it and/or modify
13// it under the terms of the GNU General Public License as published by
14// the Free Software Foundation, either version 3 of the License, or
15// (at your option) any later version.
16//
17// mxlib is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20// GNU General Public License for more details.
21//
22// You should have received a copy of the GNU General Public License
23// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
24//***********************************************************************//
25
26#ifndef __readColumns_hpp__
27#define __readColumns_hpp__
28
29#include <cstring>
30#include <string>
31#include <fstream>
32#include <format>
33
34#include "../mxlib.hpp"
35
36#include "stringUtils.hpp"
37
38#define MX_READCOL_MISSINGVALSTR "-99"
39
40namespace mx
41{
42namespace ioutils
43{
44
45struct readColSpaceDelim
46{
47 static constexpr char delim = ' ';
48 static constexpr char strDelim = '"';
49 static constexpr char eol = '\n';
50 static constexpr char comment = '#';
51 static constexpr const char *missingValStr = MX_READCOL_MISSINGVALSTR;
52};
53
54struct readColCommaDelim
55{
56 static constexpr char delim = ',';
57 static constexpr char strdelim = '"';
58 static constexpr char eol = '\n';
59 static constexpr char comment = '#';
60 static constexpr const char *missingValStr = MX_READCOL_MISSINGVALSTR;
61};
62
63template <class delimT, class verboseT>
64error_t readcol( [[maybe_unused]] const char *sin, [[maybe_unused]] int sz, [[maybe_unused]] int &colno )
65{
66 return error_t::noerror;
67}
68
69template <class delimT, class verboseT, typename arrT, typename... arrTs>
70error_t readcol( const char *sin, int sz, int &colno, arrT &array, arrTs &...arrays )
71{
72 try
73 {
74 std::string str;
75
76 int i = 0;
77 int l = strlen( sin );
78
79 if( l < 1 )
80 {
81 return error_t::noerror;
82 }
83
84 // Eat white space
85 while( isspace( sin[i] ) && sin[i] != delimT::eol && i < l )
86 {
87 ++i;
88 }
89 sin = sin + i;
90 sz = sz - i;
91
92 // If there's nothing here, we still need to populate the vector
93 if( sz == 0 )
94 {
95 mx::error_t errc;
96 array.push_back( stoT<typename arrT::value_type>( "", &errc ) );
97
98 if( errc != mx::error_t::noerror )
99 {
100 return internal::mxlib_error_report<verboseT>( errc, std::format( "processing column {}", colno ) );
101 }
102
104 }
105
106 std::stringstream sinstr( sin );
107
108 std::getline( sinstr, str, delimT::delim );
109
110 // Last entry in line might contain eol
111 if( str[str.size() - 1] == delimT::eol )
112 {
113 str.erase( str.size() - 1 );
114 }
115
116 mx::error_t errc;
117 if( str.size() == 0 )
118 {
119 array.push_back( stoT<typename arrT::value_type>( MX_READCOL_MISSINGVALSTR, &errc ) );
120 }
121 else
122 {
123 array.push_back( stoT<typename arrT::value_type>( str, &errc ) );
124 }
125
126 if( errc != mx::error_t::noerror )
127 {
128 return internal::mxlib_error_report<verboseT>( errc, std::format( "processing column {}", colno ) );
129 }
130
131 sin += ( str.size() + 1 ) * sizeof( char );
132 sz -= ( str.size() + 1 ) * sizeof( char );
133 }
134 catch( const std::invalid_argument &e )
135 {
136 // We always catch this one
137 return internal::mxlib_error_report<verboseT>( error_t::std_invalid_argument,
138 std::format( "processing column {}: {}", colno, e.what() ) );
139 }
140 catch( const std::out_of_range &e )
141 {
142 // We always catch this one
143 return internal::mxlib_error_report<verboseT>( error_t::std_out_of_range,
144 std::format( "processing column {}: {}", colno, e.what() ) );
145 }
146 catch( const std::bad_alloc &e )
147 {
148
149 // clang-format off
150 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS )
151 return internal::mxlib_error_report<verboseT>( error_t::std_bad_alloc,
152 std::format( "processing column {}: {}", colno, e.what() ) );
153 #else
154 std::throw_with_nested(mx::exception<verboseT>(error_t::std_bad_alloc,
155 std::format( "processing column {}: {}", colno, e.what() ) ));
156 #endif
157 // clang-format on
158 }
159 catch( const std::exception &e )
160 {
161 // clang-format off
162 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS ) || defined( MXLIB_CATCH_NONALLOC_EXCEPTIONS )
163 return internal::mxlib_error_report<verboseT>( error_t::std_exception,
164 std::format( "processing column {}: {}", colno, e.what() ) );
165
166 #else
167 std::throw_with_nested(mx::exception<verboseT>(error_t::std_exception,
168 std::format( "processing column {}: {}", colno, e.what() ) ));
169
170 #endif
171 // clang-format on
172 }
173 catch(...)
174 {
175 // clang-format off
176 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS ) || defined( MXLIB_CATCH_NONALLOC_EXCEPTIONS )
177 return internal::mxlib_error_report<verboseT>( error_t::exception,
178 std::format( "processing column {}", colno) );
179
180 #else
181 std::throw_with_nested(mx::exception<verboseT>(error_t::exception,
182 std::format( "processing column {}", colno) ));
183
184 #endif
185 }
186
187 ++colno;
188 return readcol<delimT, verboseT>( sin, sz, colno, arrays... );
189}
190
191/// Read in columns from a text file
192/** This function opens a file containing data formatted in columns and reads in the data row by row.
193 * The data are stored in std::vectors, which should not be pre-allocated (though they could be reserve()-ed).
194 *
195 * Example:
196 * \code
197 * std::vector<int> i1;
198 * std::vector<float> f1;
199 * std::vector<double> d1;
200 *
201 * readColumns("data_file.txt", i1, f1, d1);
202 * \endcode
203 *
204 * Note that the types of the vectors do not need to be specified as template arguments.
205 *
206 * The format of the file can be specified with template arguments like
207 * \code
208 * readColumns<',', ';', '\r'>("data_file.csv", i1, f1, d1);
209 * \endcode
210 * which sets the delimmiter to comma, the comment character to ;, and the end-of-line to \\r.
211 *
212 * Columns can be skipped using mx::ioutils::skipCol.
213 *
214 * \tparam delimT specifies the delimiters. By default this is \ref readColSpaceDelim
215 * \tparam verbose specifies the error reporting verbosity. See \ref mx::verbose
216 *
217 *
218 * \ingroup asciiutils
219 */
220template <class delimT = readColSpaceDelim, class verboseT = verbose::d, typename... arrTs>
221error_t readColumns( const std::string &fname, ///< [in] is the file name to read from
222 arrTs &...arrays /**< [out] a variadic list of std::vectors. Any number with mixed
223 value_type can be specified. Neither allocated nor cleared,
224 so repeated calls will append data.*/
225)
226{
227 // open file
228 errno = 0;
229 std::ifstream fin;
230 fin.open( fname );
231
232 if( !fin.good() )
233 {
234 error_t errc;
235 if( errno != 0 )
236 {
237 errc = errno2error_t( errno );
238 }
239 else
240 {
241 errc = error_t::fileoerr;
242 }
243
244 return internal::mxlib_error_report<verboseT>( errc, "Opening " + fname + " for reading" );
245 }
246
247 std::string line;
248
249 int64_t lineno = -1;
250
251 while( fin.good() )
252 {
253 ++lineno;
254 try
255 {
256 std::getline( fin, line, delimT::eol );
257 }
258 catch( const std::bad_alloc &e )
259 {
260
261 // clang-format off
262 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS )
263 return internal::mxlib_error_report<verboseT>( error_t::std_bad_alloc,
264 std::format( "Reading from {} at line {}: {}.",
265 fname, lineno, e.what() ) );
266;
267 #else
268 std::throw_with_nested( mx::exception<verboseT>( error_t::std_bad_alloc,
269 std::format( "Reading from {} at line {}: {}.",
270 fname, lineno, e.what() ) ) );
271 #endif
272 // clang-format on
273 }
274 catch( const std::exception &e )
275 {
276
277 // clang-format off
278 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS ) || defined(MXLIB_CATCH_NONALLOC_EXCEPTIONS)
279
280 return internal::mxlib_error_report<verboseT>( error_t::std_exception,
281 std::format( "Reading from {} at line {}: {}.",
282 fname,
283 lineno,
284 e.what() ) );
285 #else
286 std::throw_with_nested(mx::exception<verboseT>( error_t::std_exception,
287 std::format( "Reading from {} at line {}: {}.",
288 fname, lineno, e.what() ) ) );
289 #endif
290 // clang-format on
291 }
292 catch( ... )
293 {
294
295 // clang-format off
296 #if defined( MXLIB_CATCH_ALL_EXCEPTIONS ) || defined(MXLIB_CATCH_NONALLOC_EXCEPTIONS)
297
298 return internal::mxlib_error_report<verboseT>( error_t::exception,
299 std::format( "Reading from {} at line {}",
300 fname,
301 lineno ) );
302 #else
303 std::throw_with_nested(mx::exception<verboseT>( error_t::exception,
304 std::format( "Reading from {} at line {}",
305 fname, lineno) ) );
306 #endif
307 // clang-format on
308 }
309
310 if( line.size() == 0 )
311 {
312 continue;
313 }
314
315 // Find start of comment and end line at that point.
316 size_t i = 0;
317 bool nonspace = false; // record if we find a non-space character before the comment
318 while( i < line.size() && line[i] != delimT::comment )
319 {
320 if( !nonspace && !isspace( line[i] ) )
321 {
322 nonspace = true;
323 }
324 ++i;
325 }
326
327 // Check if line is all comment
328 if( i == 0 || !nonspace )
329 {
330 continue;
331 }
332
333 if( i < line.size() ) // i is > 0 if we're here
334 {
335 line.erase( line.begin() + i, line.end() ); // does not throw
336 }
337
338 int colno = 0;
339 error_t errc = readcol<delimT, verboseT>( line.c_str(), line.size(), colno, arrays... );
340
341 if( errc != error_t::noerror )
342 {
343 return internal::mxlib_error_report<verboseT>(
344 errc,
345 std::format( "Reading from {} at line {} column {}", fname, lineno + 1, colno + 1 ) );
346 }
347 }
348
349 // getline will have set fail if there was no new line on the last line.
350 if( fin.bad() && !fin.fail() )
351 {
352 error_t errc;
353 if( errno != 0 )
354 {
355 errc = errno2error_t( errno );
356 }
357 else
358 {
359 errc = error_t::filererr;
360 }
361
362 return internal::mxlib_error_report<verboseT>( errc, "Reading from " + fname );
363 }
364
365 fin.clear(); // Clear the fail bit which may have been set by getline
366 errno = 0;
367 fin.close();
368
369 if( fin.fail() )
370 {
371 error_t errc;
372 if( errno != 0 )
373 {
374 errc = errno2error_t( errno );
375 }
376 else
377 {
378 errc = error_t::filecerr;
379 }
380
381 return internal::mxlib_error_report<verboseT>( errc, "Closing" + fname );
382 }
383
384 return error_t::noerror;
385}
386
387/// A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocation.
388/** The alternative is to use dummy vectors, which result in excess memory allocations and deallocations.
389 * Usage:
390 \code
391 std::vector<T> col1, col5;
392 skipCol sk;
393 readColumns("filename.txt", col1, sk, sk, sk, col5); //This results in only columns 1 and 5 being stored.
394 \endcode
395 *
396 * \ingroup asciiutils
397 */
399{
400 typedef std::string value_type; ///< value_type is defined as std::string so that no conversions take place.
401
402 template <typename T>
403 void push_back( const T &arg )
404 {
405 return;
406 }
407};
408
409} // namespace ioutils
410} // namespace mx
411
412#endif //__readColumns_hpp__
Augments an exception with the source file and line.
Definition exception.hpp:42
error_t readColumns(const std::string &fname, arrTs &...arrays)
Read in columns from a text file.
error_t
The mxlib error codes.
Definition error_t.hpp:26
static constexpr error_t errno2error_t(const int &err)
Convert an errno code to error_t.
Definition error_t.hpp:2006
@ noerror
No error has occurred.
@ std_exception
An exception was thrown.
@ filererr
An error occurred while reading from a file.
@ exception
An exception was thrown.
@ std_bad_alloc
A bad allocation exception was thrown.
@ filecerr
An error occurred while closing a file.
@ fileoerr
An error occurred while opening a file.
@ std_out_of_range
An out of range exception was thrown.
@ std_invalid_argument
An invalid argument exception was thrown.
MXLIB_DEFAULT_VERBOSITY d
The default verbosity.
Definition error.hpp:202
The mxlib c++ namespace.
Definition mxlib.hpp:37
Utilities for working with strings.
A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocat...
std::string value_type
value_type is defined as std::string so that no conversions take place.