35
36:- module(csv,
37 [ csv//1, 38 csv//2, 39
40 csv_read_file/2, 41 csv_read_file/3, 42 csv_read_stream/3, 43
44 csv_read_file_row/3, 45 csv_read_row/3, 46 csv_options/2, 47
48 csv_write_file/2, 49 csv_write_file/3, 50 csv_write_stream/3 51 ]). 52:- use_module(library(record),[(record)/1, op(_,_,record)]). 53
54:- autoload(library(apply),[maplist/2]). 55:- autoload(library(debug),[debug/3]). 56:- autoload(library(error),[must_be/2,domain_error/2]). 57:- autoload(library(lists),[append/3]). 58:- autoload(library(option),[option/2,select_option/4]). 59:- autoload(library(pure_input),
60 [phrase_from_file/3,phrase_from_stream/2]). 61:- autoload(library(readutil),[read_line_to_codes/2]). 62:- autoload(library(dcg/basics),[string//1,eos//0]). 63
64
77
78:- predicate_options(csv//2, 2,
79 [ separator(nonneg), 80 strip(boolean),
81 ignore_quotes(boolean),
82 convert(boolean),
83 case(oneof([down,preserve,up])),
84 functor(atom),
85 arity(-nonneg), 86 match_arity(boolean)
87 ]). 88:- predicate_options(csv_read_file/3, 3,
89 [ pass_to(csv//2, 2),
90 pass_to(phrase_from_file/3, 3)
91 ]). 92:- predicate_options(csv_read_file_row/3, 3,
93 [ pass_to(csv//2, 2),
94 pass_to(open/4, 4)
95 ]). 96:- predicate_options(csv_write_file/3, 3,
97 [ pass_to(csv//2, 2),
98 pass_to(open/4, 4)
99 ]). 100:- predicate_options(csv_write_stream/3, 3,
101 [ pass_to(csv//2, 2)
102 ]). 103
104
105:- record
106 csv_options(separator:integer=0',,
107 strip:boolean=false,
108 ignore_quotes:boolean=false,
109 convert:boolean=true,
110 case:oneof([down,preserve,up])=preserve,
111 functor:atom=row,
112 arity:integer,
113 match_arity:boolean=true,
114 skip_header:atom). 115
116
136
137
138csv_read_file(File, Rows) :-
139 csv_read_file(File, Rows, []).
140
141csv_read_file(File, Rows, Options) :-
142 default_separator(File, Options, Options1),
143 make_csv_options(Options1, Record, RestOptions),
144 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions).
145
146
147default_separator(File, Options0, Options) :-
148 ( option(separator(_), Options0)
149 -> Options = Options0
150 ; file_name_extension(_, Ext0, File),
151 downcase_atom(Ext0, Ext),
152 ext_separator(Ext, Sep)
153 -> Options = [separator(Sep)|Options0]
154 ; Options = Options0
155 ).
156
157ext_separator(csv, 0',).
158ext_separator(tsv, 0'\t).
159
160
164
165csv_read_stream(Stream, Rows, Options) :-
166 make_csv_options(Options, Record, _),
167 phrase_from_stream(csv_roptions(Rows, Record), Stream).
168
169
218
219csv(Rows) -->
220 csv(Rows, []).
221
222csv(Rows, Options) -->
223 { make_csv_options(Options, Record, _) },
224 csv_roptions(Rows, Record).
225
226csv_roptions(Rows, Record) -->
227 { ground(Rows) },
228 !,
229 emit_csv(Rows, Record).
230csv_roptions(Rows, Record) -->
231 skip_header(Record),
232 csv_data(Rows, Record).
233
(Options) -->
235 { csv_options_skip_header(Options, CommentStart),
236 nonvar(CommentStart),
237 atom_codes(CommentStart, Codes)
238 },
239 !,
240 skip_header_lines(Codes),
241 skip_blank_lines.
242skip_header(_) -->
243 [].
244
(CommentStart) -->
246 string(CommentStart),
247 !,
248 ( string(_Comment),
249 end_of_record
250 -> skip_header_lines(CommentStart)
251 ).
252skip_header_lines(_) -->
253 [].
254
255skip_blank_lines -->
256 eos,
257 !.
258skip_blank_lines -->
259 end_of_record,
260 !,
261 skip_blank_lines.
262skip_blank_lines -->
263 [].
264
265csv_data([], _) -->
266 eos,
267 !.
268csv_data([Row|More], Options) -->
269 row(Row, Options),
270 !,
271 { debug(csv, 'Row: ~p', [Row]) },
272 csv_data(More, Options).
273
274
275row(Row, Options) -->
276 fields(Fields, Options),
277 { csv_options_functor(Options, Functor),
278 Row =.. [Functor|Fields],
279 functor(Row, _, Arity),
280 check_arity(Options, Arity)
281 }.
282
283check_arity(Options, Arity) :-
284 csv_options_arity(Options, Arity),
285 !.
286check_arity(Options, _) :-
287 csv_options_match_arity(Options, false),
288 !.
289check_arity(Options, Arity) :-
290 csv_options_arity(Options, Expected),
291 domain_error(row_arity(Expected), Arity).
292
293fields([F|T], Options) -->
294 field(F, Options),
295 ( separator(Options)
296 -> fields(T, Options)
297 ; end_of_record
298 -> { T = [] }
299 ).
300
301field(Value, Options) -->
302 "\"",
303 { csv_options_ignore_quotes(Options, false) },
304 !,
305 string_codes(Codes),
306 { make_value(Codes, Value, Options) }.
307field(Value, Options) -->
308 { csv_options_strip(Options, true) },
309 !,
310 stripped_field(Value, Options).
311field(Value, Options) -->
312 { csv_options_separator(Options, Sep) },
313 field_codes(Codes, Sep),
314 { make_value(Codes, Value, Options) }.
315
316
317stripped_field(Value, Options) -->
318 ws,
319 ( "\"",
320 { csv_options_strip(Options, false) }
321 -> string_codes(Codes),
322 ws
323 ; { csv_options_separator(Options, Sep) },
324 field_codes(Codes0, Sep),
325 { strip_trailing_ws(Codes0, Codes) }
326 ),
327 { make_value(Codes, Value, Options) }.
328
329ws --> " ", !, ws.
330ws --> "\t", !, ws.
331ws --> "".
332
333strip_trailing_ws(List, Stripped) :-
334 append(Stripped, WS, List),
335 all_ws(WS).
336
337all_ws([]).
338all_ws([32|T]) :- all_ws(T).
339all_ws([9|T]) :- all_ws(T).
340
341
346
347string_codes(List) -->
348 [H],
349 ( { H == 0'" }
350 -> ( "\""
351 -> { List = [H|T] },
352 string_codes(T)
353 ; { List = [] }
354 )
355 ; { List = [H|T] },
356 string_codes(T)
357 ).
358
359field_codes([], Sep), [Sep] --> [Sep], !.
360field_codes([], _), "\n" --> "\r\n", !.
361field_codes([], _), "\n" --> "\n", !.
362field_codes([], _), "\n" --> "\r", !.
363field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep).
364field_codes([], _) --> []. 365
370
371make_value(Codes, Value, Options) :-
372 csv_options_convert(Options, Convert),
373 csv_options_case(Options, Case),
374 make_value(Convert, Case, Codes, Value).
375
376make_value(true, preserve, Codes, Value) :-
377 !,
378 name(Value, Codes).
379make_value(true, Case, Codes, Value) :-
380 !,
381 ( number_string(Value, Codes)
382 -> true
383 ; make_value(false, Case, Codes, Value)
384 ).
385make_value(false, preserve, Codes, Value) :-
386 !,
387 atom_codes(Value, Codes).
388make_value(false, down, Codes, Value) :-
389 !,
390 string_codes(String, Codes),
391 downcase_atom(String, Value).
392make_value(false, up, Codes, Value) :-
393 string_codes(String, Codes),
394 upcase_atom(String, Value).
395
396separator(Options) -->
397 { csv_options_separator(Options, Sep) },
398 [Sep].
399
400end_of_record --> "\n". 401end_of_record --> "\r\n". 402end_of_record --> "\r". 403end_of_record --> eos. 404
405
421
422csv_read_file_row(File, Row, Options) :-
423 default_separator(File, Options, Options1),
424 make_csv_options(Options1, RecordOptions, Options2),
425 select_option(line(Line), Options2, RestOptions, _),
426 setup_call_cleanup(
427 open(File, read, Stream, RestOptions),
428 csv_read_stream_row(Stream, Row, Line, RecordOptions),
429 close(Stream)).
430
431csv_read_stream_row(Stream, Row, Line, Options) :-
432 between(1, infinite, Line),
433 ( csv_read_row(Stream, Row0, Options),
434 Row0 \== end_of_file
435 -> Row = Row0
436 ; !,
437 fail
438 ).
439
440
447
448csv_read_row(Stream, Row, _Record) :-
449 at_end_of_stream(Stream),
450 !,
451 Row = end_of_file.
452csv_read_row(Stream, Row, Record) :-
453 read_lines_to_codes(Stream, Codes, Record, even),
454 phrase(row(Row0, Record), Codes),
455 !,
456 Row = Row0.
457
458read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :-
459 read_line_to_codes(Stream, Codes0),
460 Codes0 \== end_of_file,
461 ( ( csv_options_ignore_quotes(Options, true)
462 ; check_quotes(Codes0, QuoteQuantity, even)
463 )
464 -> Codes = Codes0
465 ; append(Codes0, [0'\n|Tail], Codes),
466 read_lines_to_codes(Stream, Tail, Options, odd)
467 ).
468
469check_quotes([], QuoteQuantity, QuoteQuantity) :-
470 !.
471check_quotes([0'"|T], odd, Result) :-
472 !,
473 check_quotes(T, even, Result).
474check_quotes([0'"|T], even, Result) :-
475 !,
476 check_quotes(T, odd, Result).
477check_quotes([_|T], QuoteQuantity, Result) :-
478 check_quotes(T, QuoteQuantity, Result).
479
480
487
488csv_options(Compiled, Options) :-
489 make_csv_options(Options, Compiled, _Ignored).
490
491
492 495
503
504csv_write_file(File, Data) :-
505 csv_write_file(File, Data, []).
506
507csv_write_file(File, Data, Options) :-
508 must_be(list, Data),
509 default_separator(File, Options, Options1),
510 make_csv_options(Options1, OptionsRecord, RestOptions),
511 setup_call_cleanup(
512 open(File, write, Out, RestOptions),
513 maplist(csv_write_row(Out, OptionsRecord), Data),
514 close(Out)).
515
516csv_write_row(Out, OptionsRecord, Row) :-
517 phrase(emit_row(Row, OptionsRecord), String),
518 format(Out, '~s', [String]).
519
520emit_csv([], _) --> [].
521emit_csv([H|T], Options) -->
522 emit_row(H, Options),
523 emit_csv(T, Options).
524
525emit_row(Row, Options) -->
526 { Row =.. [_|Fields] },
527 emit_fields(Fields, Options),
528 "\r\n". 529
530emit_fields([], _) -->
531 "".
532emit_fields([H|T], Options) -->
533 emit_field(H, Options),
534 ( { T == [] }
535 -> []
536 ; { csv_options_separator(Options, Sep) },
537 [Sep],
538 emit_fields(T, Options)
539 ).
540
541emit_field(H, Options) -->
542 { ( atom(H)
543 -> atom_codes(H, Codes)
544 ; string(H)
545 -> string_codes(H, Codes)
546 )
547 },
548 !,
549 ( { needs_quotes(H, Options) }
550 -> "\"", emit_string(Codes), "\""
551 ; emit_codes(Codes)
552 ).
553emit_field([], _) -->
554 !,
555 { atom_codes('[]', Codes) },
556 emit_codes(Codes).
557emit_field(H, _) -->
558 { number_codes(H,Codes) },
559 emit_codes(Codes).
560
561needs_quotes(Atom, _) :-
562 sub_atom(Atom, _, _, _, '"'),
563 !.
564needs_quotes(Atom, _) :-
565 sub_atom(Atom, _, _, _, '\n'),
566 !.
567needs_quotes(Atom, _) :-
568 sub_atom(Atom, _, _, _, '\r'),
569 !.
570needs_quotes(Atom, Options) :-
571 csv_options_separator(Options, Sep),
572 char_code(Char, Sep),
573 sub_atom(Atom, _, _, _, Char),
574 !.
575
576emit_string([]) --> "".
577emit_string([0'"|T]) --> !, "\"\"", emit_string(T).
578emit_string([H|T]) --> [H], emit_string(T).
579
580emit_codes([]) --> "".
581emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T).
582emit_codes([H|T]) --> [H], emit_codes(T).
583
584
600
601csv_write_stream(Stream, Data, Options) :-
602 must_be(list, Data),
603 make_csv_options(Options, OptionsRecord, _),
604 maplist(csv_write_row(Stream, OptionsRecord), Data)