1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2017, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(r_data, 36 [ r_data_frame/3, % +Rvar, +Columns, :Goal 37 r_data_frame_from_rows/2, % +RVar, +Rows 38 r_data_frame_from_dicts/2, % +DataFrame, +Rows 39 40 r_data_frame_to_dicts/2, % +Rvar, -Dicts 41 r_data_frame_to_rows/3, % +RVar, +Functor, -Rows 42 43 r_data_frame_colnames/2, % +RVars, -ColNames 44 r_data_frame_rownames/2 % +RVars, -RowNames 45 ]). 46:- use_module(r_call). 47:- use_module(library(apply)). 48:- use_module(library(error)). 49:- use_module(library(pairs)). 50:- use_module(library(lists)). 51 52:- meta_predicate 53 r_data_frame( , , ). 54 55/** <module> R data frame handling 56 57This library provides predicates for creating and fetching R data 58frames. R data frames are typically 2-dimensional arrays where the data 59is organised in _columns_. In Prolog, data is typically organised in 60_rows_ (or _records_). 61*/ 62 63%% r_data_frame(+Rvar, +Columns, :Goal) is det. 64% 65% Create an R data.frame from the solutions of Goal. The resulting 66% data frame is bound to the R variable Rvar. For example: 67% 68% ``` 69% ?- r_data_frame(movieyear, 70% [movie=Name, year=Year], 71% movie(Name, Year)). 72% ``` 73% 74% @arg Rvar is the name of the R output variable 75% @arg Columns is a list Name=Var 76 77r_data_frame(RVar, ColSpec, Goal) :- 78 must_be(atom, RVar), 79 maplist(arg(1), ColSpec, Names), 80 maplist(arg(2), ColSpec, Vars), 81 Templ =.. [v|Vars], 82 findall(Templ, Goal, Rows), 83 r_data_frame_from_rows(RVar, Rows), 84 colnames(RVar) <- Names. 85 86%% r_data_frame_to_dicts(+DataFrame, -Dicts) is det. 87% 88% Translate a DataFrame into a list of dicts, where each dict 89% represents a _row_. The keys of the dicts are fetched from 90% `colnames(DataFrame)`. For example: 91% 92% ``` 93% ?- r_data_frame_to_dicts(mtcars, Dicts). 94% Dicts = [ row{am:1, carb:4, cyl:6, disp:160.0, drat:3.9, 95% gear:4, hp:110, mpg:21.0, qsec:16.46, vs:0, 96% wt:2.62}, 97% ... 98% ] 99% ``` 100 101r_data_frame_to_dicts(DataFrame, Dicts) :- 102 Cols <- DataFrame, 103 ColNameStrings <- colnames(DataFrame), 104 maplist(atom_string, ColNames, ColNameStrings), 105 pairs_keys_values(Pairs, ColNames, _), 106 dict_pairs(Templ, _, Pairs), 107 maplist(dict_cols(Templ, Dicts), ColNames, Cols). 108 109dict_cols(Templ, Dicts, Name, Col) :- 110 maplist(fill_col(Templ, Name), Col, Dicts). 111 112fill_col(_, Name, Value, Dict) :- 113 nonvar(Dict), !, 114 get_dict(Name, Dict, Value). 115fill_col(Templ, Name, Value, Dict) :- 116 copy_term(Templ, Dict), 117 get_dict(Name, Dict, Value). 118 119%% r_data_frame_to_rows(+DataFrame, +Functor, -Rows) is det. 120% 121% Translate a 2-dimensional R dataframe into a list of compound 122% terms, each representing a row. The functor of each row is 123% Functor. For example: 124% 125% ``` 126% ?- r_data_frame_to_rows(mtcars, car, Rows). 127% Rows = [ car(21.0, 6, 160.0, 110, 3.9, 2.62, 16.46, 0, 1, 4, 4), 128% ... 129% ]. 130% ``` 131 132r_data_frame_to_rows(DataFrame, Functor, Rows) :- 133 Cols <- DataFrame, 134 length(Cols, Arity), 135 term_cols(Cols, 1, Arity, Functor, Rows). 136 137term_cols([], _, _, _, _). 138term_cols([Col|Cols], I, Arity, Functor, Rows) :- 139 maplist(term_col(I, Arity, Functor), Col, Rows), 140 I2 is I+1, 141 term_cols(Cols, I2, Arity, Functor, Rows). 142 143term_col(1, Arity, Functor, Value, Term) :- !, 144 functor(Term, Functor, Arity), 145 arg(1, Term, Value). 146term_col(I, _, _, Value, Term) :- 147 arg(I, Term, Value). 148 149%% r_data_frame_from_dicts(+DataFrame, +Rows) is det. 150% 151% Assign the R variable DataFrame the content of Rows. Rows is a 152% list of dicts that must all have the same set of keys. The keys 153% are used as column names. 154% 155% @see dicts_to_same_keys/3 to align the set of keys for each dict 156 157r_data_frame_from_dicts(DataFrame, Rows) :- 158 must_be(atom, DataFrame), 159 must_be(list, Rows), 160 Rows = [Row1|_], 161 dict_keys(Row1, Keys), 162 dict_col_data(Keys, Rows, ColData), 163 compound_name_arguments(Term, 'data.frame', ColData), 164 DataFrame <- Term, 165 colnames(DataFrame) <- Keys. 166 167dict_col_data([], _, []). 168dict_col_data([K|Keys], Rows, [ColI|ColR]) :- 169 maplist(get_dict(K), Rows, ColI), 170 dict_col_data(Keys, Rows, ColR). 171 172%% r_data_frame_from_rows(+DataFrame, +Rows) is det. 173% 174% Assign the R variable DataFrame the content of Rows. Rows is a 175% list of compound terms. 176 177r_data_frame_from_rows(DataFrame, Rows) :- 178 must_be(atom, DataFrame), 179 must_be(list, Rows), 180 Rows = [Row1|_], 181 functor(Row1, _, NCols), 182 col_data(1, NCols, Rows, ColData), 183 append(ColData, [stringsAsFactors = 'FALSE'], ColDataOpts), 184 compound_name_arguments(Term, 'data.frame', ColDataOpts), 185 DataFrame <- Term. 186 187col_data(I, NCols, Rows, [ColI|ColR]) :- 188 I =< NCols, !, 189 maplist(arg(I), Rows, ColI), 190 I2 is I + 1, 191 col_data(I2, NCols, Rows, ColR). 192col_data(_, _, _, []). 193 194%% r_data_frame_colnames(+DataFrame, -ColNames:list(atom)) is det. 195% 196% ColNames are the column names for DataFrame as a list of atoms. 197 198r_data_frame_colnames(DataFrame, ColNames) :- 199 ColNameStrings <- colnames(DataFrame), 200 maplist(atom_string, ColNames, ColNameStrings). 201 202%% r_data_frame_rownames(+DataFrame, -RowNames:list(atom)) is det. 203% 204% RowNames are the row names for DataFrame as a list of atoms. 205 206r_data_frame_rownames(DataFrame, RowNames) :- 207 RowNameStrings <- rownames(DataFrame), 208 maplist(atom_string, RowNames, RowNameStrings)