mxlib
c++ tools for analyzing astronomical data and other tasks by Jared R. Males. [git repo]
Loading...
Searching...
No Matches
readColumns.hpp
Go to the documentation of this file.
1/** \file readColumns.hpp
2 * \author Jared R. Males
3 * \brief A utility to read in columns from a text file.
4 * \ingroup asciiutils
5 */
6
7//***********************************************************************//
8// Copyright 2015, 2016, 2017 Jared R. Males (jaredmales@gmail.com)
9//
10// This file is part of mxlib.
11//
12// mxlib is free software: you can redistribute it and/or modify
13// it under the terms of the GNU General Public License as published by
14// the Free Software Foundation, either version 3 of the License, or
15// (at your option) any later version.
16//
17// mxlib is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20// GNU General Public License for more details.
21//
22// You should have received a copy of the GNU General Public License
23// along with mxlib. If not, see <http://www.gnu.org/licenses/>.
24//***********************************************************************//
25
26#ifndef __readColumns_hpp__
27#define __readColumns_hpp__
28
29#include <fstream>
30#include <string>
31#include <cstring>
32#include <iostream>
33
34#include "../mxlib.hpp"
35#include "../mxError.hpp"
36
37#include "stringUtils.hpp"
38
39#define MX_READCOL_MISSINGVALSTR "-99"
40
41namespace mx
42{
43namespace ioutils
44{
45
46template <char delim = ' ', char eol = '\n'>
47void readcol( char *sin, int sz )
48{
49 static_cast<void>( sin );
50 static_cast<void>( sz );
51
52 return;
53}
54
55template <char delim = ' ', char eol = '\n', typename arrT, typename... arrTs>
56void readcol( char *sin, int sz, arrT &array, arrTs &...arrays )
57{
58 // static const unsigned short int nargs = sizeof...(arrTs);
59 std::string str;
60
61 int i = 0;
62 int l = strlen( sin );
63
64 if( l < 1 )
65 return;
66
67 // Eat white space
68 while( isspace( sin[i] ) && sin[i] != eol && i < l )
69 ++i;
70 sin = sin + i;
71 sz = sz - i;
72
73 // If there's nothing here, we still need to populate the vector
74 if( sz <= 1 )
75 {
76 array.push_back( convertFromString<typename arrT::value_type>( "" ) );
77 return;
78 }
79
80 std::stringstream sinstr( sin );
81
82 std::getline( sinstr, str, delim );
83
84 // Last entry in line might contain eol
85 if( str[str.size() - 1] == eol )
86 {
87 str.erase( str.size() - 1 );
88 }
89
90 if( str.size() == 0 )
91 {
92 array.push_back( convertFromString<typename arrT::value_type>( MX_READCOL_MISSINGVALSTR ) );
93 }
94 else
95 {
96 array.push_back( convertFromString<typename arrT::value_type>( str ) );
97 }
98
99 sin += ( str.size() + 1 ) * sizeof( char );
100 sz -= ( str.size() + 1 ) * sizeof( char );
101
102 readcol<delim, eol>( sin, sz, arrays... );
103}
104
105/// Read in columns from a text file
106/** This function opens a file containing data formatted in columns and reads in the data row by row.
107 * The data are stored in std::vectors, which should not be pre-allocated (though they could be reserve()-ed).
108 *
109 * Example:
110 * \code
111 * std::vector<int> i1;
112 * std::vector<float> f1;
113 * std::vector<double> d1;
114 *
115 * readColumns("data_file.txt", i1, f1, d1);
116 * \endcode
117 *
118 * Note that the types of the vectors do not need to be specified as template arguments.
119 *
120 * The format of the file can be specified with template arguments like
121 * \code
122 * readColumns<',', ';', '\r'>("data_file.csv", i1, f1, d1);
123 * \endcode
124 * which sets the delimmiter to comma, the comment character to ;, and the end-of-line to \\r.
125 *
126 * Columns can be skipped using mx::ioutils::skipCol.
127 *
128 * \tparam delim is the character separating columns, by default this is space.
129 * \tparam comment is the character starting a comment. by default this is #
130 * \tparam eol is the end of line character. by default this is \n
131 * \tparam arrTs a variadic list of array types. this is not specified by the user.
132 *
133 * \todo lineSize should be configurable
134 *
135 * \ingroup asciiutils
136 */
137template <char delim = ' ', char comment = '#', char eol = '\n', typename... arrTs>
138int readColumns( const std::string &fname, ///< [in] is the file name to read from
139 arrTs &...arrays ///< [out] a variadic list of std::vectors. Any number with mixed value_type can be
140 ///< specified. Neither allocated nor cleared, so repeated calls will append data.
141)
142{
143 // open file
144 errno = 0;
145 std::ifstream fin;
146 fin.open( fname );
147
148 if( !fin.good() )
149 {
150 if( errno != 0 )
151 {
152 mxPError( "readColumns", errno, "Occurred while opening " + fname + " for reading." );
153 }
154 else
155 {
156 mxError( "readColumns", MXE_FILEOERR, "Occurred while opening " + fname + " for reading." );
157 }
158 return -1;
159 }
160
161 int lineSize = 4096;
162 char *line = new char[lineSize];
163
164 while( fin.good() )
165 {
166 // Save one space for adding eol
167 fin.getline( line, lineSize - 1, eol );
168
169 int i = 0;
170 int l = strlen( line );
171
172 if( l <= 0 )
173 break;
174
175 // std::cerr << line << "\n";
176
177 // Find start of comment and end line at that point.
178 while( line[i] != comment )
179 {
180 ++i;
181 if( i == l )
182 break;
183 }
184
185 if( i <= l - 1 )
186 {
187 line[i] = '\0';
188 }
189
190 l = strlen( line );
191
192 if( l == 0 )
193 continue;
194
195 // Make sure line ends with eol
196 line[l] = eol;
197 ++l;
198 line[l] = '\0';
199
200 readcol<delim, eol>( line, strlen( line ), arrays... );
201 }
202
203 delete[] line;
204
205 // getline will have set fail if there was no new line on the last line.
206 if( fin.bad() && !fin.fail() )
207 {
208 if( errno != 0 )
209 {
210 mxPError( "readColumns", errno, "Occurred while reading from " + fname + "." );
211 }
212 else
213 {
214 mxError( "readColumns", MXE_FILERERR, "Occurred while reading from " + fname + "." );
215 }
216 return -1;
217 }
218
219 fin.clear(); // Clear the fail bit which may have been set by getline
220 fin.close();
221
222 if( fin.fail() )
223 {
224 if( errno != 0 )
225 {
226 mxPError( "readColumns", errno, "Occurred while closing " + fname + "." );
227 }
228 else
229 {
230 mxError( "readColumns", MXE_FILECERR, "Occurred while closing " + fname + "." );
231 }
232 return -1;
233 }
234
235 return 0;
236}
237
238/// A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocation.
239/** The alternative is to use dummy vectors, which result in excess memory allocations and deallocations.
240 * Usage:
241 \code
242 std::vector<T> col1, col5;
243 skipCol sk;
244 readColumns("filename.txt", col1, sk, sk, sk, col5); //This results in only columns 1 and 5 being stored.
245 \endcode
246 *
247 * \ingroup asciiutils
248 */
250{
251 typedef std::string value_type; ///< value_type is defined as std::string so that no conversions take place.
252
253 template <typename T>
254 void push_back( const T &arg )
255 {
256 return;
257 }
258};
259
260} // namespace ioutils
261} // namespace mx
262
263#endif //__readColumns_hpp__
int readColumns(const std::string &fname, arrTs &...arrays)
Read in columns from a text file.
The mxlib c++ namespace.
Definition mxError.hpp:106
Utilities for working with strings.
A dummy class to allow mx::readColumns to skip a column(s) in a file without requiring memory allocat...
std::string value_type
value_type is defined as std::string so that no conversions take place.