1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2018, VU University Amsterdam 7 CWI, Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(csv, 37 [ csv//1, % +Rows 38 csv//2, % +Rows, +Options 39 40 csv_read_file/2, % +File, -Data 41 csv_read_file/3, % +File, -Data, +Options 42 csv_read_stream/3, % +Stream, -Data, +Options 43 44 csv_read_file_row/3, % +File, -Row, +Options 45 csv_read_row/3, % +Stream, -Row, +CompiledOptions 46 csv_options/2, % -Compiled, +Options 47 48 csv_write_file/2, % +File, +Data 49 csv_write_file/3, % +File, +Data, +Options 50 csv_write_stream/3 % +Stream, +Data, +Options 51 ]). 52:- use_module(library(record),[(record)/1, op(_,_,record)]). 53 54:- autoload(library(apply),[maplist/2]). 55:- autoload(library(debug),[debug/3]). 56:- autoload(library(error),[must_be/2,domain_error/2]). 57:- autoload(library(lists),[append/3]). 58:- autoload(library(option),[option/2,select_option/4]). 59:- autoload(library(pure_input), 60 [phrase_from_file/3,phrase_from_stream/2]). 61:- autoload(library(readutil),[read_line_to_codes/2]). 62:- autoload(library(dcg/basics),[string//1,eos//0]).
78:- predicate_options(csv//2, 2, 79 [ separator(nonneg), % mustv be code 80 strip(boolean), 81 ignore_quotes(boolean), 82 convert(boolean), 83 case(oneof([down,preserve,up])), 84 functor(atom), 85 arity(-nonneg), % actually ?nonneg 86 match_arity(boolean) 87 ]). 88:- predicate_options(csv_read_file/3, 3, 89 [ pass_to(csv//2, 2), 90 pass_to(phrase_from_file/3, 3) 91 ]). 92:- predicate_options(csv_read_file_row/3, 3, 93 [ pass_to(csv//2, 2), 94 pass_to(open/4, 4) 95 ]). 96:- predicate_options(csv_write_file/3, 3, 97 [ pass_to(csv//2, 2), 98 pass_to(open/4, 4) 99 ]). 100:- predicate_options(csv_write_stream/3, 3, 101 [ pass_to(csv//2, 2) 102 ]). 103 104 105:- record 106 csv_options(separator:integer=0',, 107 strip:boolean=false, 108 ignore_quotes:boolean=false, 109 convert:boolean=true, 110 case:oneof([down,preserve,up])=preserve, 111 functor:atom=row, 112 arity:integer, 113 match_arity:boolean=true, 114 skip_header:atom).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
138csv_read_file(File, Rows) :- 139 csv_read_file(File, Rows, []). 140 141csv_read_file(File, Rows, Options) :- 142 default_separator(File, Options, Options1), 143 make_csv_options(Options1, Record, RestOptions), 144 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 145 146 147default_separator(File, Options0, Options) :- 148 ( option(separator(_), Options0) 149 -> Options = Options0 150 ; file_name_extension(_, Ext0, File), 151 downcase_atom(Ext0, Ext), 152 ext_separator(Ext, Sep) 153 -> Options = [separator(Sep)|Options0] 154 ; Options = Options0 155 ). 156 157ext_separator(csv, 0',). 158ext_separator(tsv, 0'\t).
165csv_read_stream(Stream, Rows, Options) :-
166 make_csv_options(Options, Record, _),
167 phrase_from_stream(csv_roptions(Rows, Record), Stream).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.#
. After
skipping comment lines this option causes csv//2 to skip empty
lines. Note that an empty line may not contain white space
characters (space or tab) as these may provide valid data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.219csv(Rows) --> 220 csv(Rows, []). 221 222csv(Rows, Options) --> 223 { make_csv_options(Options, Record, _) }, 224 csv_roptions(Rows, Record). 225 226csv_roptions(Rows, Record) --> 227 { ground(Rows) }, 228 !, 229 emit_csv(Rows, Record). 230csv_roptions(Rows, Record) --> 231 skip_header(Record), 232 csv_data(Rows, Record). 233 234skip_header(Options) --> 235 { csv_options_skip_header(Options, CommentStart), 236 nonvar(CommentStart), 237 atom_codes(CommentStart, Codes) 238 }, 239 !, 240 skip_header_lines(Codes), 241 skip_blank_lines. 242skip_header(_) --> 243 []. 244 245skip_header_lines(CommentStart) --> 246 string(CommentStart), 247 !, 248 ( string(_Comment), 249 end_of_record 250 -> skip_header_lines(CommentStart) 251 ). 252skip_header_lines(_) --> 253 []. 254 255skip_blank_lines --> 256 eos, 257 !. 258skip_blank_lines --> 259 end_of_record, 260 !, 261 skip_blank_lines. 262skip_blank_lines --> 263 []. 264 265csv_data([], _) --> 266 eos, 267 !. 268csv_data([Row|More], Options) --> 269 row(Row, Options), 270 !, 271 { debug(csv, 'Row: ~p', [Row]) }, 272 csv_data(More, Options). 273 274 275row(Row, Options) --> 276 fields(Fields, Options), 277 { csv_options_functor(Options, Functor), 278 Row =.. [Functor|Fields], 279 functor(Row, _, Arity), 280 check_arity(Options, Arity) 281 }. 282 283check_arity(Options, Arity) :- 284 csv_options_arity(Options, Arity), 285 !. 286check_arity(Options, _) :- 287 csv_options_match_arity(Options, false), 288 !. 289check_arity(Options, Arity) :- 290 csv_options_arity(Options, Expected), 291 domain_error(row_arity(Expected), Arity). 292 293fields([F|T], Options) --> 294 field(F, Options), 295 ( separator(Options) 296 -> fields(T, Options) 297 ; end_of_record 298 -> { T = [] } 299 ). 300 301field(Value, Options) --> 302 "\"", 303 { csv_options_ignore_quotes(Options, false) }, 304 !, 305 string_codes(Codes), 306 { make_value(Codes, Value, Options) }. 307field(Value, Options) --> 308 { csv_options_strip(Options, true) }, 309 !, 310 stripped_field(Value, Options). 311field(Value, Options) --> 312 { csv_options_separator(Options, Sep) }, 313 field_codes(Codes, Sep), 314 { make_value(Codes, Value, Options) }. 315 316 317stripped_field(Value, Options) --> 318 ws, 319 ( "\"", 320 { csv_options_strip(Options, false) } 321 -> string_codes(Codes), 322 ws 323 ; { csv_options_separator(Options, Sep) }, 324 field_codes(Codes0, Sep), 325 { strip_trailing_ws(Codes0, Codes) } 326 ), 327 { make_value(Codes, Value, Options) }. 328 329ws --> " ", !, ws. 330ws --> "\t", !, ws. 331ws --> "". 332 333strip_trailing_ws(List, Stripped) :- 334 append(Stripped, WS, List), 335 all_ws(WS). 336 337all_ws([]). 338all_ws([32|T]) :- all_ws(T). 339all_ws([9|T]) :- all_ws(T).
347string_codes(List) --> 348 [H], 349 ( { H == 0'" } 350 -> ( "\"" 351 -> { List = [H|T] }, 352 string_codes(T) 353 ; { List = [] } 354 ) 355 ; { List = [H|T] }, 356 string_codes(T) 357 ). 358 359field_codes([], Sep), [Sep] --> [Sep], !. 360field_codes([], _), "\n" --> "\r\n", !. 361field_codes([], _), "\n" --> "\n", !. 362field_codes([], _), "\n" --> "\r", !. 363field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 364field_codes([], _) --> []. % unterminated last record
371make_value(Codes, Value, Options) :- 372 csv_options_convert(Options, Convert), 373 csv_options_case(Options, Case), 374 make_value(Convert, Case, Codes, Value). 375 376make_value(true, preserve, Codes, Value) :- 377 !, 378 name(Value, Codes). 379make_value(true, Case, Codes, Value) :- 380 !, 381 ( number_string(Value, Codes) 382 -> true 383 ; make_value(false, Case, Codes, Value) 384 ). 385make_value(false, preserve, Codes, Value) :- 386 !, 387 atom_codes(Value, Codes). 388make_value(false, down, Codes, Value) :- 389 !, 390 string_codes(String, Codes), 391 downcase_atom(String, Value). 392make_value(false, up, Codes, Value) :- 393 string_codes(String, Codes), 394 upcase_atom(String, Value). 395 396separator(Options) --> 397 { csv_options_separator(Options, Sep) }, 398 [Sep]. 399 400end_of_record --> "\n". % Unix files 401end_of_record --> "\r\n". % DOS files 402end_of_record --> "\r". % MacOS files 403end_of_record --> eos. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
422csv_read_file_row(File, Row, Options) :- 423 default_separator(File, Options, Options1), 424 make_csv_options(Options1, RecordOptions, Options2), 425 select_option(line(Line), Options2, RestOptions, _), 426 setup_call_cleanup( 427 open(File, read, Stream, RestOptions), 428 csv_read_stream_row(Stream, Row, Line, RecordOptions), 429 close(Stream)). 430 431csv_read_stream_row(Stream, Row, Line, Options) :- 432 between(1, infinite, Line), 433 ( csv_read_row(Stream, Row0, Options), 434 Row0 \== end_of_file 435 -> Row = Row0 436 ; !, 437 fail 438 ).
end_of_file
upon reaching the
end of the input.448csv_read_row(Stream, Row, _Record) :- 449 at_end_of_stream(Stream), 450 !, 451 Row = end_of_file. 452csv_read_row(Stream, Row, Record) :- 453 read_lines_to_codes(Stream, Codes, Record, even), 454 phrase(row(Row0, Record), Codes), 455 !, 456 Row = Row0. 457 458read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :- 459 read_line_to_codes(Stream, Codes0), 460 Codes0 \== end_of_file, 461 ( ( csv_options_ignore_quotes(Options, true) 462 ; check_quotes(Codes0, QuoteQuantity, even) 463 ) 464 -> Codes = Codes0 465 ; append(Codes0, [0'\n|Tail], Codes), 466 read_lines_to_codes(Stream, Tail, Options, odd) 467 ). 468 469check_quotes([], QuoteQuantity, QuoteQuantity) :- 470 !. 471check_quotes([0'"|T], odd, Result) :- 472 !, 473 check_quotes(T, even, Result). 474check_quotes([0'"|T], even, Result) :- 475 !, 476 check_quotes(T, odd, Result). 477check_quotes([_|T], QuoteQuantity, Result) :- 478 check_quotes(T, QuoteQuantity, Result).
488csv_options(Compiled, Options) :- 489 make_csv_options(Options, Compiled, _Ignored). 490 491 492 /******************************* 493 * OUTPUT * 494 *******************************/
\t
for
.tsv
files and ,
otherwise.504csv_write_file(File, Data) :- 505 csv_write_file(File, Data, []). 506 507csv_write_file(File, Data, Options) :- 508 must_be(list, Data), 509 default_separator(File, Options, Options1), 510 make_csv_options(Options1, OptionsRecord, RestOptions), 511 setup_call_cleanup( 512 open(File, write, Out, RestOptions), 513 maplist(csv_write_row(Out, OptionsRecord), Data), 514 close(Out)). 515 516csv_write_row(Out, OptionsRecord, Row) :- 517 phrase(emit_row(Row, OptionsRecord), String), 518 format(Out, '~s', [String]). 519 520emit_csv([], _) --> []. 521emit_csv([H|T], Options) --> 522 emit_row(H, Options), 523 emit_csv(T, Options). 524 525emit_row(Row, Options) --> 526 { Row =.. [_|Fields] }, 527 emit_fields(Fields, Options), 528 "\r\n". % RFC 4180 demands \r\n 529 530emit_fields([], _) --> 531 "". 532emit_fields([H|T], Options) --> 533 emit_field(H, Options), 534 ( { T == [] } 535 -> [] 536 ; { csv_options_separator(Options, Sep) }, 537 [Sep], 538 emit_fields(T, Options) 539 ). 540 541emit_field(H, Options) --> 542 { ( atom(H) 543 -> atom_codes(H, Codes) 544 ; string(H) 545 -> string_codes(H, Codes) 546 ) 547 }, 548 !, 549 ( { needs_quotes(H, Options) } 550 -> "\"", emit_string(Codes), "\"" 551 ; emit_codes(Codes) 552 ). 553emit_field([], _) --> 554 !, 555 { atom_codes('[]', Codes) }, 556 emit_codes(Codes). 557emit_field(H, _) --> 558 { number_codes(H,Codes) }, 559 emit_codes(Codes). 560 561needs_quotes(Atom, _) :- 562 sub_atom(Atom, _, _, _, '"'), 563 !. 564needs_quotes(Atom, _) :- 565 sub_atom(Atom, _, _, _, '\n'), 566 !. 567needs_quotes(Atom, _) :- 568 sub_atom(Atom, _, _, _, '\r'), 569 !. 570needs_quotes(Atom, Options) :- 571 csv_options_separator(Options, Sep), 572 char_code(Char, Sep), 573 sub_atom(Atom, _, _, _, Char), 574 !. 575 576emit_string([]) --> "". 577emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 578emit_string([H|T]) --> [H], emit_string(T). 579 580emit_codes([]) --> "". 581emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 582emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)),
601csv_write_stream(Stream, Data, Options) :-
602 must_be(list, Data),
603 make_csv_options(Options, OptionsRecord, _),
604 maplist(csv_write_row(Stream, OptionsRecord), Data)
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.