View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2017, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(r_data,
   36	  [ r_data_frame/3,		% +Rvar, +Columns, :Goal
   37	    r_data_frame_from_rows/2,	% +RVar, +Rows
   38	    r_data_frame_from_dicts/2,	% +DataFrame, +Rows
   39
   40	    r_data_frame_to_dicts/2,	% +Rvar, -Dicts
   41	    r_data_frame_to_rows/3,	% +RVar, +Functor, -Rows
   42
   43	    r_data_frame_colnames/2,	% +RVars, -ColNames
   44	    r_data_frame_rownames/2	% +RVars, -RowNames
   45	  ]).   46:- use_module(r_call).   47:- use_module(library(apply)).   48:- use_module(library(error)).   49:- use_module(library(pairs)).   50:- use_module(library(lists)).   51
   52:- meta_predicate
   53	r_data_frame(+, +, 0).   54
   55/** <module> R data frame handling
   56
   57This library provides predicates  for  creating   and  fetching  R  data
   58frames. R data frames are typically  2-dimensional arrays where the data
   59is organised in _columns_. In  Prolog,   data  is typically organised in
   60_rows_ (or _records_).
   61*/
   62
   63%%	r_data_frame(+Rvar, +Columns, :Goal) is det.
   64%
   65%	Create an R data.frame from the solutions of Goal. The resulting
   66%	data frame is bound to the R variable Rvar.  For example:
   67%
   68%	```
   69%	?- r_data_frame(movieyear,
   70%		        [movie=Name, year=Year],
   71%			movie(Name, Year)).
   72%	```
   73%
   74%	@arg	Rvar is the name of the R output variable
   75%	@arg	Columns is a list Name=Var
   76
   77r_data_frame(RVar, ColSpec, Goal) :-
   78	must_be(atom, RVar),
   79	maplist(arg(1), ColSpec, Names),
   80	maplist(arg(2), ColSpec, Vars),
   81	Templ =.. [v|Vars],
   82	findall(Templ, Goal, Rows),
   83	r_data_frame_from_rows(RVar, Rows),
   84	colnames(RVar) <- Names.
   85
   86%%	r_data_frame_to_dicts(+DataFrame, -Dicts) is det.
   87%
   88%	Translate a DataFrame into a  list   of  dicts,  where each dict
   89%	represents a _row_. The  keys  of   the  dicts  are fetched from
   90%	`colnames(DataFrame)`.  For example:
   91%
   92%	```
   93%	?- r_data_frame_to_dicts(mtcars, Dicts).
   94%	Dicts = [ row{am:1, carb:4, cyl:6, disp:160.0, drat:3.9,
   95%		      gear:4, hp:110, mpg:21.0, qsec:16.46, vs:0,
   96%		      wt:2.62},
   97%		  ...
   98%		]
   99%	```
  100
  101r_data_frame_to_dicts(DataFrame, Dicts) :-
  102	Cols <- DataFrame,
  103	ColNameStrings <- colnames(DataFrame),
  104	maplist(atom_string, ColNames, ColNameStrings),
  105	pairs_keys_values(Pairs, ColNames, _),
  106	dict_pairs(Templ, _, Pairs),
  107	maplist(dict_cols(Templ, Dicts), ColNames, Cols).
  108
  109dict_cols(Templ, Dicts, Name, Col) :-
  110	maplist(fill_col(Templ, Name), Col, Dicts).
  111
  112fill_col(_, Name, Value, Dict) :-
  113	nonvar(Dict), !,
  114	get_dict(Name, Dict, Value).
  115fill_col(Templ, Name, Value, Dict) :-
  116	copy_term(Templ, Dict),
  117	get_dict(Name, Dict, Value).
  118
  119%%	r_data_frame_to_rows(+DataFrame, +Functor, -Rows) is det.
  120%
  121%	Translate a 2-dimensional R dataframe into   a  list of compound
  122%	terms, each representing a  row.  The   functor  of  each row is
  123%	Functor.  For example:
  124%
  125%	```
  126%	?- r_data_frame_to_rows(mtcars, car, Rows).
  127%	Rows = [ car(21.0, 6, 160.0, 110, 3.9, 2.62, 16.46, 0, 1, 4, 4),
  128%	         ...
  129%	       ].
  130%	```
  131
  132r_data_frame_to_rows(DataFrame, Functor, Rows) :-
  133	Cols <- DataFrame,
  134	length(Cols, Arity),
  135	term_cols(Cols, 1, Arity, Functor, Rows).
  136
  137term_cols([], _, _, _, _).
  138term_cols([Col|Cols], I, Arity, Functor, Rows) :-
  139	maplist(term_col(I, Arity, Functor), Col, Rows),
  140	I2 is I+1,
  141	term_cols(Cols, I2, Arity, Functor, Rows).
  142
  143term_col(1, Arity, Functor, Value, Term) :- !,
  144	functor(Term, Functor, Arity),
  145	arg(1, Term, Value).
  146term_col(I, _, _, Value, Term) :-
  147	arg(I, Term, Value).
  148
  149%%	r_data_frame_from_dicts(+DataFrame, +Rows) is det.
  150%
  151%	Assign the R variable DataFrame the content   of Rows. Rows is a
  152%	list of dicts that must all have the  same set of keys. The keys
  153%	are used as column names.
  154%
  155%	@see dicts_to_same_keys/3 to align the set of keys for each dict
  156
  157r_data_frame_from_dicts(DataFrame, Rows) :-
  158	must_be(atom, DataFrame),
  159	must_be(list, Rows),
  160	Rows = [Row1|_],
  161	dict_keys(Row1, Keys),
  162	dict_col_data(Keys, Rows, ColData),
  163	compound_name_arguments(Term, 'data.frame', ColData),
  164	DataFrame <- Term,
  165	colnames(DataFrame) <- Keys.
  166
  167dict_col_data([], _, []).
  168dict_col_data([K|Keys], Rows, [ColI|ColR]) :-
  169	maplist(get_dict(K), Rows, ColI),
  170	dict_col_data(Keys, Rows, ColR).
  171
  172%%	r_data_frame_from_rows(+DataFrame, +Rows) is det.
  173%
  174%	Assign the R variable DataFrame the content   of Rows. Rows is a
  175%	list of compound terms.
  176
  177r_data_frame_from_rows(DataFrame, Rows) :-
  178	must_be(atom, DataFrame),
  179	must_be(list, Rows),
  180	Rows = [Row1|_],
  181	functor(Row1, _, NCols),
  182	col_data(1, NCols, Rows, ColData),
  183	append(ColData, [stringsAsFactors = 'FALSE'], ColDataOpts),
  184	compound_name_arguments(Term, 'data.frame', ColDataOpts),
  185	DataFrame <- Term.
  186
  187col_data(I, NCols, Rows, [ColI|ColR]) :-
  188	I =< NCols, !,
  189	maplist(arg(I), Rows, ColI),
  190	I2 is I + 1,
  191	col_data(I2, NCols, Rows, ColR).
  192col_data(_, _, _, []).
  193
  194%%	r_data_frame_colnames(+DataFrame, -ColNames:list(atom)) is det.
  195%
  196%	ColNames are the column names for DataFrame as a list of atoms.
  197
  198r_data_frame_colnames(DataFrame, ColNames) :-
  199	ColNameStrings <- colnames(DataFrame),
  200	maplist(atom_string, ColNames, ColNameStrings).
  201
  202%%	r_data_frame_rownames(+DataFrame, -RowNames:list(atom)) is det.
  203%
  204%	RowNames are the row names for DataFrame as a list of atoms.
  205
  206r_data_frame_rownames(DataFrame, RowNames) :-
  207	RowNameStrings <- rownames(DataFrame),
  208	maplist(atom_string, RowNames, RowNameStrings)