Qore CsvUtil Module Reference  1.7
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2020 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
273 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
274 
275 public:
276 protected:
278  const Options = ...;
279 
280 
281  // field separator
282  string separator = ",";
283 
284  // field content delimiter
285  string quote = "\"";
286 
287  // number of header lines
288  softint headerLines = 0;
289 
290  // flag to use string names from the first header row if possible
291  bool headerNames = False;
292 
293  // True if empty lines should be ignored
294  bool ignoreEmptyLines = True;
295 
296  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
297  bool ignoreWhitespace = True;
298 
299  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
300  *TimeZone timezone;
301 
302  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
303  bool checkElementCounts = False;
304 
305  // getRecord/getValue returns extended hash
306  bool extendedRecord = False;
307 
308  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
309  bool compat_force_empty_string = False;
310 
311  // read ahead flag
312  bool read_ahead;
313 
314  // column count for verifying column counts
315  int cc;
316 
317  // current record count for the index() method
318  int rc = 0;
319 
320  // to resolve record type by rules
321  hash m_resolve_by_rule;
322 
323  // to resolve record type by number of fields
324  hash m_resolve_by_count;
325 
326  // list of idx to field transformarions, in order of spec
327  hash m_resolve_by_idx;
328 
329  // fake specs based on the first non-header row
330  bool fakeHeaderNames;
331 
332  // data source iterator
333  AbstractLineIterator lineIterator;
334 
335 public:
336 
338 
344  constructor(AbstractLineIterator li, *hash opts);
345 
346 
348 
353  // NOTE: when declared as *hash then always calls this constructor
354  constructor(AbstractLineIterator li, hash spec, hash opts);
355 
356 
358 protected:
359  processCommonOptions(*hash opts, int C_OPTx);
360 public:
361 
362 
364 protected:
366 public:
367 
368 
370 protected:
372 public:
373 
374 
375  bool valid();
376 
377 
379 
384  bool next();
385 
386 
388 
390  peek();
391 
392 
394 
401  auto memberGate(string name);
402 
403 
405 
416  hash<auto> getValue();
417 
418 
420 
433  hash<auto> getRecord(bool extended);
434 
435 
437 
448  hash<auto> getRecord();
449 
450 
452 
465 
466 
468 
475  string getSeparator();
476 
477 
479 
486  string getQuote();
487 
488 
490  *hash<string, AbstractDataField> getRecordType();
491 
492 
494 
501  *list<string> getHeaders();
502 
503 
505 
510  *list<string> getHeaders(string type);
511 
512 
514 
525  int index();
526 
527 
529 
542  int lineNumber();
543 
544 
546 
555  string getRawLine();
556 
557 
559 
569  list<*string> getRawLineValues();
570 
571 
572 protected:
573  auto handleType(hash<auto> fh, *string val);
574 public:
575 
576 
578 protected:
579  list<*string> getLineAndSplit();
580 public:
581 
582 
584 
591  string identifyType(list<auto> rec);
592 
593 
595 
602 protected:
603  *string identifyTypeImpl(list<auto> rec);
604 public:
605 
606 
608 protected:
609  hash<auto> parseLine();
610 public:
611 
612  }; // AbstractCsvIterator class
613 }; // CsvUtil namespace
CsvUtil::AbstractCsvIterator::processSpec
processSpec(hash spec)
process specification and assing internal data for resolving
CsvUtil::AbstractCsvIterator::getRecordList
auto getRecordList()
Returns the current record as a list.
CsvUtil::AbstractCsvIterator::lineNumber
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
CsvUtil::AbstractCsvIterator::next
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
CsvUtil::AbstractCsvIterator::Options
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:278
type
string type(auto arg)
CsvUtil::AbstractCsvIterator::getQuote
string getQuote()
Returns the current quote string.
CsvUtil::AbstractCsvIterator
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition: AbstractCsvIterator.qc.dox.h:273
CsvUtil::AbstractCsvIterator::index
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
CsvUtil::AbstractCsvIterator::getHeaders
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
CsvUtil::AbstractCsvIterator::getRawLineValues
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
CsvUtil::AbstractCsvIterator::getRecord
hash< auto > getRecord()
Returns the current record as a hash.
True
const True
CsvUtil
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition: AbstractCsvIterator.qc.dox.h:28
CsvUtil::AbstractCsvIterator::identifyTypeImpl
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
CsvUtil::AbstractCsvIterator::getValue
hash< auto > getValue()
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::getHeaders
*list< string > getHeaders(string type)
Returns a list of headers for the given record or NOTHING if the record is not recognized.
CsvUtil::AbstractCsvIterator::getSeparator
string getSeparator()
Returns the current separator string.
CsvUtil::AbstractCsvIterator::identifyType
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
list
list< auto > list(...)
hash
hash< auto > hash(object obj)
CsvUtil::AbstractCsvIterator::getRecord
hash< auto > getRecord(bool extended)
Returns the current record as a hash.
CsvUtil::AbstractCsvIterator::getRawLine
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
CsvUtil::AbstractCsvIterator::getRecordType
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
False
const False
CsvUtil::AbstractCsvIterator::prepareFieldsFromHeaders
prepareFieldsFromHeaders(*list headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
CsvUtil::AbstractCsvIterator::peek
peek()
Reads a single row without moving the index position.
CsvUtil::AbstractCsvIterator::constructor
constructor(AbstractLineIterator li, hash spec, hash opts)
creates the AbstractCsvIterator with an option hash in multi-type mode
CsvUtil::AbstractCsvIterator::constructor
constructor(AbstractLineIterator li, *hash opts)
creates the AbstractCsvIterator with an option hash in single-type mode
CsvUtil::AbstractCsvIterator::parseLine
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
Qore::AbstractIterator
CsvUtil::AbstractCsvIterator::memberGate
auto memberGate(string name)
Returns the given column value for the current row.
CsvUtil::AbstractCsvIterator::getLineAndSplit
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
CsvUtil::AbstractCsvIterator::processCommonOptions
processCommonOptions(*hash opts, int C_OPTx)
process common options and and assing internal fields