34
35:- module(r_grammar,
36 [ r_tokens//1, 37 r_token//1, 38 r_identifier/1 39 ]). 40:- use_module(library(dcg/basics)). 41:- use_module(library(lists)).
54r_tokens([]) --> [].
55r_tokens([H|T]) -->
56 r_token(H),
57 r_tokens(T).
63r_identifier(Atom) :-
64 atom(Atom),
65 atom_codes(Atom, Codes),
66 phrase(r_token(identifier(Atom)), Codes), !.
96r_token(Token) -->
97 blanks,
98 token(Token).
99
100token(Token) --> r_number(N), !, {number_token(N, Token)}.
101token(Token) --> r_string(S), !, {Token = string(S)}.
102token(Token) --> r_identifier(Id), !, {identifier_token(Id, Token)}.
103token(Token) --> r_infix(Id), !, {Token = infix(Id)}.
104token(Token) --> r_operator(Id), !, {Token = op(Id)}.
105token(Token) --> r_punct(Id), !, {Token = punct(Id)}.
106token(Token) --> r_comment(Id), !, {Token = comment(Id)}.
107
108number_token(complex(I), complex(I)) :- !.
109number_token(N, number(N)) :- !.
113r_number(Number) -->
114 r_basic_number(N),
115 ( "L"
116 -> { integer(N) -> Number = N; Number is integer(N) }
117 ; "i"
118 -> { Number = complex(N) }
119 ; { Number = N }
120 ).
121
122r_basic_number(N) -->
123 int_codes(I), !,
124 ( dot,
125 digit(DF0),
126 digits(DF)
127 -> {F = [0'., DF0|DF]}
128 ; dot
129 -> {F = `.0`}
130 ; {F = []}
131 ),
132 ( exp
133 -> int_codes(DI),
134 {E=[0'e|DI]}
135 ; {E = []}
136 ),
137 { append([I, F, E], Codes),
138 number_codes(N, Codes)
139 }.
140r_basic_number(N) -->
141 dot, !,
142 digit(DF0),
143 digits(DF),
144 {F = [0'., DF0|DF]},
145 ( exp
146 -> int_codes(DI),
147 {E=[0'e|DI]}
148 ; {E = []}
149 ),
150 { append([`0`, F, E], Codes),
151 number_codes(N, Codes)
152 }.
153r_basic_number(N) --> "0x", !, xinteger(N).
154r_basic_number(N) --> "0X", !, xinteger(N).
155
156
157int_codes([C,D0|D]) -->
158 sign(C), !,
159 digit(D0),
160 digits(D).
161int_codes([D0|D]) -->
162 digit(D0),
163 digits(D).
164
165sign(0'-) --> "-".
166sign(0'+) --> "+".
167
168dot --> ".".
169
170exp --> "e".
171exp --> "E".
175r_string(S) -->
176 "\"",
177 r_string_codes(C),
178 "\"", !,
179 { string_codes(S, C) }.
180r_string(S) -->
181 "'",
182 r_string_codes(C),
183 "'", !,
184 { string_codes(S, C) }.
185
186
187r_string_codes([]) --> [].
188r_string_codes([H|T]) -->
189 r_string_code(H),
190 r_string_codes(T).
191
192r_string_code(H) --> "\\", !, r_escape(H).
193r_string_code(H) --> [H].
194
195r_escape(H) --> [C], { r_escape(C, H) }, !.
196r_escape(H) --> "x", !, xdigit(D1), xdigit(D2), {H is D1<<4 + D2}.
197r_escape(H) --> "u", xdigits(4, H), !.
198r_escape(H) --> "u{", xdigits(4, H), "}", !.
199r_escape(H) --> "U", xdigits(8, H), !.
200r_escape(H) --> "U{", xdigits(8, H), "}", !.
201r_escape(H) --> digit(D1), {D1 =< 7}, !, odigits(2, D1, H).
202
203xdigits(N, V) --> xdigits(N, 0, V).
204
205xdigits(0, V, V) --> !.
206xdigits(N, V0, V) -->
207 xdigit(D),
208 { V1 is V0*16 + D,
209 N1 is N - 1
210 },
211 xdigits(N1, V1, V).
212
213odigits(0, V, V) --> !.
214odigits(N, V0, V) -->
215 digit(D), {D =< 7}, !,
216 { V1 is V0*8 + D,
217 N1 is N - 1
218 },
219 odigits(N1, V1, V).
220odigits(_, V, V) --> [].
221
222r_escape(0'\', 0'\').
223r_escape(0'\", 0'\").
224r_escape(0'n, 0'\n).
225r_escape(0'r, 0'\r).
226r_escape(0't, 0'\t).
227r_escape(0'b, 0'\b).
228r_escape(0'a, 0'\a).
229r_escape(0'f, 0'\f).
230r_escape(0'v, 0'\v).
231r_escape(0'\\, 0'\\).
238r_identifier(Identifier) -->
239 r_identifier_code(C0),
240 \+ { no_identifier_start(C0) },
241 r_identifier_codes(L),
242 { \+ ( C0 == 0'., L = [C1|_], code_type(C1, digit) ),
243 atom_codes(Identifier, [C0|L])
244 }.
245
246r_identifier_code(C) -->
247 [C],
248 { code_type(C, alnum)
249 -> true
250 ; C == 0'_
251 -> true
252 ; C == 0'.
253 }.
254
255r_identifier_codes([H|T]) -->
256 r_identifier_code(H), !,
257 r_identifier_codes(T).
258r_identifier_codes([]) --> [].
259
260no_identifier_start(C) :- code_type(C, digit).
261no_identifier_start(0'_).
265identifier_token(Id, Token) :-
266 ( reserved_identifier(Id, Token)
267 -> true
268 ; sub_atom(Id, 0, _, _, '..'),
269 sub_atom(Id, 2, _, 0, After),
270 ( After == '.'
271 -> true
272 ; atom_number(After, N),
273 integer(N)
274 )
275 -> Token = reserved(Id)
276 ; Token = identifier(Id)
277 ).
278
279reserved_identifier('NULL', constant('NULL')).
280reserved_identifier('NA', constant('NA')).
281reserved_identifier('NA_integer_', constant('NA_integer_')).
282reserved_identifier('NA_real_', constant('NA_real_')).
283reserved_identifier('NA_complex_', constant('NA_complex_')).
284reserved_identifier('NA_character_', constant('NA_character_')).
285reserved_identifier('TRUE', logical(true)).
286reserved_identifier('FALSE', logical(false)).
287reserved_identifier(if, keyword(if)).
288reserved_identifier(else, keyword(else)).
289reserved_identifier(repeat, keyword(repeat)).
290reserved_identifier(while, keyword(while)).
291reserved_identifier(function, keyword(function)).
292reserved_identifier(for, keyword(for)).
293reserved_identifier(in, keyword(in)).
294reserved_identifier(next, keyword(next)).
295reserved_identifier(break, keyword(break)).
296reserved_identifier('Inf', number(Inf)) :- Inf is inf.
297reserved_identifier('NaN', number(NaN)) :- NaN is nan.
303r_infix(Id) -->
304 "%", non_blanks_short(Chars), "%", !,
305 { append([`%`, Chars, `%`], All),
306 atom_codes(Id, All)
307 }.
308
309non_blanks_short([]) --> [].
310non_blanks_short([H|T]) --> nonblank(H), non_blanks_short(T).
316r_operator(->) --> "->".
317r_operator(+) --> "+".
318r_operator(-) --> "-".
319r_operator(*) --> "*".
320r_operator(/) --> "/".
321r_operator('%%') --> "%%".
322r_operator(^) --> "^".
323
324r_operator(>=) --> ">=".
325r_operator(>) --> ">".
326r_operator(<=) --> "<=".
327r_operator(<-) --> "<-".
328r_operator(<) --> "<".
329r_operator(==) --> "==".
330r_operator('!=') --> "!=".
331
332r_operator(!) --> "!".
333r_operator(&) --> "&".
334r_operator('|') --> "|".
335r_operator(~) --> "~".
336r_operator($) --> "$".
337r_operator(:) --> ":".
338r_operator(=) --> "=".
339
340r_punct('(') --> "(".
341r_punct(')') --> ")".
342r_punct('{') --> "{".
343r_punct('}') --> "}".
344r_punct('[') --> "[".
345r_punct(']') --> "]".
346r_punct(',') --> ",".
347
(String) -->
349 "#", string(Codes), eol, !,
350 { string_codes(String, Codes) }.
351
352:- if(\+ current_predicate(eol//0)). 354eol --> "\r".
355eol --> "\n".
356eol --> eos.
357:- endif.
R parser primitives