Qore CsvUtil Module Reference  1.6
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2017 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // minimum required Qore version
26 
27 
28 // assume local var scope, do not use "$" for vars, members, and method calls
29 
30 
31 /* see release notes below for version history
32 */
33 
295 class CsvHelper {
296 
297 public:
298  private :
299  const C_OPT1 = 0x1;
300  const C_OPT2 = 0x2;
302  const Types = (
303  "int": True,
304  "*int": True,
305  "float": True,
306  "*float": True,
307  "number": True,
308  "*number": True,
309  "string": True,
310  "*string": True,
311  "date": True,
312  "*date": True,
313  );
314 
316  const FieldAttrs = ("type", "format", "timezone", "code", "header");
317 
319  bool tolwr = False;
320 
322  string date_format;
323 
325  hash m_specs;
326 
328  string errname;
329 
330  // reorder data according headers set by options.headers or read from CsvHeader
331  bool headerReorder = True;
332 
333 
334 public:
335 
337  constructor (string n_errname);
338 
339 
341 
342 private:
343  bool isMultiType();
344 public:
345 
346 
348 
349 private:
350  checkType(string fld_errs, string key, string value);
351 public:
352 
353 
354  // get spec from options.fields for old Csv. Check spec param for new Csv
355 
356 private:
357  hash getSpec(*hash fields, string fld_errs, int C_OPTx);
358 public:
359 
360 
361 
362 private:
363  hash getSpec1(*hash fields);
364 public:
365 
366 
367 
368 private:
369  hash getSpec2(hash spec);
370 public:
371 
372 
378 private:
379  list adjustFieldsFromHeaders(string type, *list headers, bool check = False);
380 public:
381 
382 
383 }; // class CsvHelper
384 
386 namespace CsvUtil {
388  const EOL_UNIX = "\n";
390  const EOL_WIN = "\r\n";
392  const EOL_MACINTOSH = "\r";
393 
394  // helper list of end of line values
395  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
396 
398  const CSV_TYPE_UNKNOWN = "<unknown>";
400  const CSV_TYPE_SINGLE = "<single>";
401 
403 
641 class AbstractCsvIterator : public Qore::AbstractIterator, private CsvHelper {
642 
643 public:
644  private :
646  const Options = (
647  "date_format": C_OPT1|C_OPT2,
648  "date-format": C_OPT1|C_OPT2,
649  "encoding": C_OPT1|C_OPT2,
650  "eol": C_OPT1|C_OPT2,
651  "extended_record": C_OPT2,
652  "fields": C_OPT1,
653  "header-lines": C_OPT1|C_OPT2,
654  "header_lines": C_OPT1|C_OPT2,
655  "header-names": C_OPT1|C_OPT2,
656  "header_names": C_OPT1|C_OPT2,
657  "header_reorder": C_OPT1|C_OPT2,
658  "headers": C_OPT1,
659  "ignore-empty": C_OPT1|C_OPT2,
660  "ignore_empty": C_OPT1|C_OPT2,
661  "ignore-whitespace": C_OPT1|C_OPT2,
662  "ignore_whitespace": C_OPT1|C_OPT2,
663  "quote": C_OPT1|C_OPT2,
664  "separator": C_OPT1|C_OPT2,
665  "timezone": C_OPT1|C_OPT2,
666  "tolwr": C_OPT1|C_OPT2,
667  "verify-columns": C_OPT1|C_OPT2,
668  "verify_columns": C_OPT1|C_OPT2,
669  );
670 
671  // field separator
672  string separator = ",";
673 
674  // field content delimiter
675  string quote = "\"";
676 
677  // number of header lines
678  softint headerLines = 0;
679 
680  // flag to use string names from the first header row if possible
681  bool headerNames = False;
682 
683  // True if empty lines should be ignored
684  bool ignoreEmptyLines = True;
685 
686  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
687  bool ignoreWhitespace = True;
688 
689  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
690  *TimeZone timezone;
691 
692  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
693  bool checkElementCounts = False;
694 
695  // getRecord/getValue returns extended hash
696  bool extendedRecord = False;
697 
698  // column count for verifying column counts
699  int cc;
700 
701  // current record count for the index() method
702  int rc = 0;
703 
704  // to resolve record type by rules
705  hash m_resolve_by_rule;
706 
707  // to resolve record type by number of fields
708  hash m_resolve_by_count;
709 
710  // list of idx to field transformarions, in order of spec
711  hash m_resolve_by_idx;
712 
713  // fake specs based on the first non-header row
714  bool fakeHeaderNames;
715 
716  // data source iterator
717  AbstractLineIterator lineIterator;
718 
719 public:
720 
722 
728  constructor(AbstractLineIterator li, *hash opts);
729 
730 
732 
737  // NOTE: when declared as *hash then always calls this constructor
738  constructor(AbstractLineIterator li, hash spec, hash opts);
739 
740 
742 
743 private:
744  processCommonOptions(*hash opts, int C_OPTx);
745 public:
746 
747 
749 
750 private:
751  processSpec(hash spec);
752 public:
753 
754 
756 
757 private:
758  prepareFieldsFromHeaders(*list headers);
759 public:
760 
761 
762  bool valid();
763 
764 
766 
771  bool next();
772 
773 
775 
782  auto memberGate(string name);
783 
784 
786 
797  hash getValue();
798 
799 
801 
814  hash getRecord(bool extended);
815 
816 
818 
829  hash getRecord();
830 
831 
833 
845  auto getRecordList();
846 
847 
849 
856  string getSeparator();
857 
858 
860 
867  string getQuote();
868 
869 
871 
878  *list getHeaders();
879 
880 
882 
887  *list getHeaders(string type);
888 
889 
891 
902  int index();
903 
904 
906 
919  int lineNumber();
920 
921 
922 
923 private:
924  auto handleType(hash fh, *string val);
925 public:
926 
927 
929 
930 private:
931  list getLineAndSplit();
932 public:
933 
934 
936 
943  string identifyType(list rec);
944 
945 
947 
955 private:
956  *string identifyTypeImpl(list rec);
957 public:
958 
959 
961 
962 private:
963  hash parseLine();
964 public:
965 
966  };
967 
969 
975 
976 public:
978 
983  constructor(Qore::AbstractLineIterator li, *hash opts) ;
984 
985 
987 
993  constructor(Qore::AbstractLineIterator li, hash spec, hash opts) ;
994 
995 
997 
1003  constructor(Qore::InputStream input, string encoding = "UTF-8", *hash opts) ;
1004 
1005 
1007 
1014  constructor(Qore::InputStream input, string encoding = "UTF-8", hash spec, hash opts) ;
1015 
1016 
1017  auto memberGate(string name);
1018 
1019  };
1020 
1022 
1030 
1031 public:
1032  private :
1034  string m_file_path;
1035 
1036 public:
1037 
1039 
1044  constructor(string path, *hash opts) ;
1045 
1046 
1048 
1052  constructor(string path, hash spec, hash opts) ;
1053 
1054 
1056  auto memberGate(string name);
1057 
1058 
1060  string getEncoding();
1061 
1062 
1064  string getFileName();
1065 
1066 
1068  hash<Qore::StatInfo> hstat();
1069 
1070 
1072  list stat();
1073 
1074  }; // CsvFileIterator class
1075 
1077 
1085 
1086 public:
1087 
1089 
1094  constructor(string data, *hash opts) ;
1095 
1096 
1098 
1102  constructor(string data, hash spec, hash opts) ;
1103 
1104 
1105  auto memberGate(string name);
1106 
1107 
1108  };
1109 
1111 
1220 class AbstractCsvWriter : private CsvHelper {
1221 
1222 public:
1223  private :
1225  const Options = (
1226  "block": C_OPT1|C_OPT2,
1227  "datamap": C_OPT1,
1228  "date_format": C_OPT1|C_OPT2,
1229  "date-format": C_OPT1|C_OPT2,
1230  "encoding": C_OPT1|C_OPT2,
1231  "eol": C_OPT1|C_OPT2,
1232  "fields": C_OPT1,
1233  "headers": C_OPT1,
1234  "header_reorder": C_OPT1,
1235  "info_log": C_OPT1|C_OPT2,
1236  "optimal_quotes": C_OPT1|C_OPT2,
1237  "optimal-quotes": C_OPT1|C_OPT2,
1238  "quote": C_OPT1|C_OPT2,
1239  "quote_escape": C_OPT1|C_OPT2,
1240  "separator": C_OPT1|C_OPT2,
1241  "verify_columns": C_OPT1|C_OPT2,
1242  "verify-columns": C_OPT1|C_OPT2,
1243  "write_headers": C_OPT1|C_OPT2,
1244  "write-headers": C_OPT1|C_OPT2,
1245  );
1246 
1248  string encoding;
1249 
1251  string separator = ",";
1252 
1254  string quote = "\"";
1255 
1257  string m_quoteEscapeChar = "\\";
1258 
1260  string eol = EOL_UNIX;
1261 
1263  bool checkElementCounts = False;
1264 
1266  int lineNo = 0;
1267 
1269  int block = 1000;
1270 
1273 
1275  bool write_headers = True;
1276 
1278  bool optimal_quotes = True;
1279 
1281  *code info_log;
1282 
1285 
1288 
1289 public:
1290 
1292 
1298  constructor(string n_errname, *hash n_opts);
1299 
1300 
1302 
1310  constructor(string n_errname, hash spec, hash n_opts);
1311 
1312 
1314 
1315 private:
1316  processCommonOptions(*hash n_opts, int C_OPTx);
1317 public:
1318 
1319 
1321 
1322 private:
1323  processSpec();
1324 public:
1325 
1326 
1328 
1329 private:
1330  writeHeaders();
1331 public:
1332 
1333 
1335 
1340  writeLine(list values);
1341 
1342 
1344 
1349  writeLine(hash values);
1350 
1351 
1353 
1359  writeLine(string type, list values);
1360 
1361 
1363 
1369  writeLine(string type, hash values);
1370 
1371 
1373 
1380  write(Qore::AbstractIterator iterator);
1381 
1382 
1384 
1391  write(Qore::SQL::SQLStatement iterator);
1392 
1393 
1395 
1402  write(list l);
1403 
1404 
1406 
1407 private:
1408  abstract writeRawLine(list values);
1409 public:
1410 
1412 
1418 private:
1419  string prepareRawLine(list values);
1420 public:
1421 
1422 
1423 
1424 private:
1425  string prepareRawLineIntern(list values);
1426 public:
1427 
1428 
1429  }; // AbstractCsvWriter class
1430 
1433 
1434 public:
1435  private :
1437  StreamWriter output;
1438 
1439 public:
1440 
1442 
1448  constructor(Qore::OutputStream output, *hash opts) ;
1449 
1450 
1452 
1459  constructor(Qore::OutputStream output, hash spec, hash opts) ;
1460 
1461 
1463 
1464 private:
1465  writeRawLine(list values);
1466 public:
1467 
1468  };
1469 
1471 
1476 
1477 public:
1478  private :
1481 
1482 public:
1483 
1485 
1493  constructor(string path, *hash opts) ;
1494 
1495 
1497 
1506  constructor(string path, hash spec, hash opts) ;
1507 
1508 
1509 
1510 private:
1511  openFile(string path);
1512 public:
1513 
1514 
1515 
1516 private:
1517  writeRawLine(list values);
1518 public:
1519 
1520  }; // CsvFileWriter
1521 
1523 
1528 
1529 public:
1530  private :
1531  // a csv content
1532  string content;
1533 
1534 public:
1535 
1537 
1542  constructor(*hash opts) ;
1543 
1544 
1546 
1552  constructor(hash spec, hash opts) ;
1553 
1554 
1555 
1556 private:
1557  initContent();
1558 public:
1559 
1560 
1561 
1562 private:
1563  writeRawLine(list values);
1564 public:
1565 
1566 
1568 
1577  string write(Qore::AbstractIterator iterator);
1578 
1579 
1581 
1590  string write(list l);
1591 
1592 
1594  string getContent();
1595 
1596  }; // CsvStringWriter
1597 }; // CsvUtil namespace
hash m_out_by_name
mapping output field by name
Definition: CsvUtil.qm.dox.h:1284
*list stat(string path)
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:641
Qore::File file
the file to write
Definition: CsvUtil.qm.dox.h:1480
const True
hash m_out_by_idx
mapping output field by index
Definition: CsvUtil.qm.dox.h:1287
const False
*code info_log
a closure/call reference for informational logging when using write(SQLStatement) ...
Definition: CsvUtil.qm.dox.h:1281
list list(...)
*hash< StatInfo > hstat(string path)
The CsvFileIterator class allows CSV files to be iterated on a record basis.
Definition: CsvUtil.qm.dox.h:1029
int index(softstring str, softstring substr, softint pos=0)
The CsvStringWriter class for in-memory string CSV creation.
Definition: CsvUtil.qm.dox.h:1527
The AbstractCsvWriter class provides a parent for all CSV writers.
Definition: CsvUtil.qm.dox.h:1220
string baseTemplate
base template for value format
Definition: CsvUtil.qm.dox.h:1272
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:392
const CSV_TYPE_UNKNOWN
Record type when non matching any type.
Definition: CsvUtil.qm.dox.h:398
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:388
StreamWriter output
the output stream for the CSV data
Definition: CsvUtil.qm.dox.h:1437
The CsvWriter class for safe CSV data creation.
Definition: CsvUtil.qm.dox.h:1432
The CsvIterator class allows CSV sources to be iterated on a record basis. The source of the input da...
Definition: CsvUtil.qm.dox.h:974
string type(auto arg)
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:390
The CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis...
Definition: CsvUtil.qm.dox.h:1084
string m_file_path
the path of the file being iterated
Definition: CsvUtil.qm.dox.h:1034
string encoding
output file character encoding
Definition: CsvUtil.qm.dox.h:1248
the CsvUtil namespace contains all the objects in the CsvUtil module
Definition: CsvUtil.qm.dox.h:386
The CsvFileWriter class for safe CSV file creation.
Definition: CsvUtil.qm.dox.h:1475
hash hash(object obj)
const CSV_TYPE_SINGLE
Record type when multi-type is disabled.
Definition: CsvUtil.qm.dox.h:400