34
   35:- module(r_grammar,
   36	  [ r_tokens//1,		   37	    r_token//1,			   38	    r_identifier/1		   39	  ]).   40:- use_module(library(dcg/basics)).   41:- use_module(library(lists)).   42
   43
   48
   53
   54r_tokens([]) --> [].
   55r_tokens([H|T]) -->
   56	r_token(H),
   57	r_tokens(T).
   58
   62
   63r_identifier(Atom) :-
   64	atom(Atom),
   65	atom_codes(Atom, Codes),
   66	phrase(r_token(identifier(Atom)), Codes), !.
   67
   68
   95
   96r_token(Token) -->
   97	blanks,
   98	token(Token).
   99
  100token(Token) --> r_number(N),         !, {number_token(N, Token)}.
  101token(Token) --> r_string(S),         !, {Token = string(S)}.
  102token(Token) --> r_identifier(Id),    !, {identifier_token(Id, Token)}.
  103token(Token) --> r_infix(Id),         !, {Token = infix(Id)}.
  104token(Token) --> r_operator(Id),      !, {Token = op(Id)}.
  105token(Token) --> r_punct(Id),         !, {Token = punct(Id)}.
  106token(Token) --> r_comment(Id),       !, {Token = comment(Id)}.
  107
  108number_token(complex(I), complex(I)) :- !.
  109number_token(N, number(N)) :- !.
  110
  112
  113r_number(Number) -->
  114	r_basic_number(N),
  115	(   "L"
  116	->  { integer(N) -> Number = N; Number is integer(N) }
  117	;   "i"
  118	->  { Number = complex(N) }
  119	;   { Number = N }
  120	).
  121
  122r_basic_number(N) -->
  123	int_codes(I), !,
  124	(   dot,
  125	    digit(DF0),
  126	    digits(DF)
  127	->  {F = [0'., DF0|DF]}
  128	;   dot
  129	->  {F = `.0`}
  130	;   {F = []}
  131	),
  132	(   exp
  133	->  int_codes(DI),
  134	    {E=[0'e|DI]}
  135	;   {E = []}
  136	),
  137	{ append([I, F, E], Codes),
  138	  number_codes(N, Codes)
  139	}.
  140r_basic_number(N) -->
  141	dot, !,
  142	digit(DF0),
  143	digits(DF),
  144	{F = [0'., DF0|DF]},
  145	(   exp
  146	->  int_codes(DI),
  147	    {E=[0'e|DI]}
  148	;   {E = []}
  149	),
  150	{ append([`0`, F, E], Codes),
  151	  number_codes(N, Codes)
  152	}.
  153r_basic_number(N) --> "0x", !, xinteger(N).
  154r_basic_number(N) --> "0X", !, xinteger(N).
  155
  156
  157int_codes([C,D0|D]) -->
  158	sign(C), !,
  159	digit(D0),
  160	digits(D).
  161int_codes([D0|D]) -->
  162	digit(D0),
  163	digits(D).
  164
  165sign(0'-) --> "-".
  166sign(0'+) --> "+".
  167
  168dot --> ".".
  169
  170exp --> "e".
  171exp --> "E".
  172
  174
  175r_string(S) -->
  176	"\"",
  177	r_string_codes(C),
  178	"\"", !,
  179	{ string_codes(S, C) }.
  180r_string(S) -->
  181	"'",
  182	r_string_codes(C),
  183	"'", !,
  184	{ string_codes(S, C) }.
  185
  186
  187r_string_codes([]) --> [].
  188r_string_codes([H|T]) -->
  189	r_string_code(H),
  190	r_string_codes(T).
  191
  192r_string_code(H) --> "\\", !, r_escape(H).
  193r_string_code(H) --> [H].
  194
  195r_escape(H) --> [C], { r_escape(C, H) }, !.
  196r_escape(H) --> "x", !, xdigit(D1), xdigit(D2), {H is D1<<4 + D2}.
  197r_escape(H) --> "u", xdigits(4, H), !.
  198r_escape(H) --> "u{", xdigits(4, H), "}", !.
  199r_escape(H) --> "U", xdigits(8, H), !.
  200r_escape(H) --> "U{", xdigits(8, H), "}", !.
  201r_escape(H) --> digit(D1), {D1 =< 7}, !, odigits(2, D1, H).
  202
  203xdigits(N, V) --> xdigits(N, 0, V).
  204
  205xdigits(0, V, V) --> !.
  206xdigits(N, V0, V) -->
  207	xdigit(D),
  208	{ V1 is V0*16 + D,
  209	  N1 is N - 1
  210	},
  211	xdigits(N1, V1, V).
  212
  213odigits(0, V, V) --> !.
  214odigits(N, V0, V) -->
  215	digit(D), {D =< 7}, !,
  216	{ V1 is V0*8 + D,
  217	  N1 is N - 1
  218	},
  219	odigits(N1, V1, V).
  220odigits(_, V, V) --> [].
  221
  222r_escape(0'\', 0'\').
  223r_escape(0'\", 0'\").
  224r_escape(0'n, 0'\n).
  225r_escape(0'r, 0'\r).
  226r_escape(0't, 0'\t).
  227r_escape(0'b, 0'\b).
  228r_escape(0'a, 0'\a).
  229r_escape(0'f, 0'\f).
  230r_escape(0'v, 0'\v).
  231r_escape(0'\\, 0'\\).
  232
  237
  238r_identifier(Identifier) -->
  239	r_identifier_code(C0),
  240	\+ { no_identifier_start(C0) },
  241	r_identifier_codes(L),
  242	{ \+ ( C0 == 0'., L = [C1|_], code_type(C1, digit) ),
  243          atom_codes(Identifier, [C0|L])
  244	}.
  245
  246r_identifier_code(C) -->
  247	[C],
  248	{   code_type(C, alnum)
  249	->  true
  250	;   C == 0'_
  251	->  true
  252	;   C == 0'.
  253	}.
  254
  255r_identifier_codes([H|T]) -->
  256	r_identifier_code(H), !,
  257	r_identifier_codes(T).
  258r_identifier_codes([]) --> [].
  259
  260no_identifier_start(C) :- code_type(C, digit).
  261no_identifier_start(0'_).
  262
  264
  265identifier_token(Id, Token) :-
  266	(   reserved_identifier(Id, Token)
  267	->  true
  268	;   sub_atom(Id, 0, _, _, '..'),
  269	    sub_atom(Id, 2, _, 0, After),
  270	    (	After == '.'
  271	    ->	true
  272	    ;	atom_number(After, N),
  273		integer(N)
  274	    )
  275	->  Token = reserved(Id)
  276	;   Token = identifier(Id)
  277	).
  278
  279reserved_identifier('NULL',	     constant('NULL')).
  280reserved_identifier('NA',	     constant('NA')).
  281reserved_identifier('NA_integer_',   constant('NA_integer_')).
  282reserved_identifier('NA_real_',	     constant('NA_real_')).
  283reserved_identifier('NA_complex_',   constant('NA_complex_')).
  284reserved_identifier('NA_character_', constant('NA_character_')).
  285reserved_identifier('TRUE',	     logical(true)).
  286reserved_identifier('FALSE',	     logical(false)).
  287reserved_identifier(if,		     keyword(if)).
  288reserved_identifier(else,	     keyword(else)).
  289reserved_identifier(repeat,	     keyword(repeat)).
  290reserved_identifier(while,	     keyword(while)).
  291reserved_identifier(function,	     keyword(function)).
  292reserved_identifier(for,	     keyword(for)).
  293reserved_identifier(in,		     keyword(in)).
  294reserved_identifier(next,	     keyword(next)).
  295reserved_identifier(break,	     keyword(break)).
  296reserved_identifier('Inf',           number(Inf)) :- Inf is inf.
  297reserved_identifier('NaN',           number(NaN)) :- NaN is nan.
  298
  302
  303r_infix(Id) -->
  304	"%", non_blanks_short(Chars), "%", !,
  305	{ append([`%`, Chars, `%`], All),
  306	  atom_codes(Id, All)
  307	}.
  308
  309non_blanks_short([]) --> [].
  310non_blanks_short([H|T]) --> nonblank(H), non_blanks_short(T).
  311
  312
  315
  316r_operator(->)	 --> "->".
  317r_operator(+)	 --> "+".
  318r_operator(-)	 --> "-".
  319r_operator(*)	 --> "*".
  320r_operator(/)	 --> "/".
  321r_operator('%%') --> "%%".
  322r_operator(^)	 --> "^".
  323
  324r_operator(>=)	 --> ">=".
  325r_operator(>)	 --> ">".
  326r_operator(<=)	 --> "<=".
  327r_operator(<-)	 --> "<-".
  328r_operator(<)	 --> "<".
  329r_operator(==)	 --> "==".
  330r_operator('!=') --> "!=".
  331
  332r_operator(!)	 --> "!".
  333r_operator(&)	 --> "&".
  334r_operator('|')	 --> "|".
  335r_operator(~)	 --> "~".
  336r_operator($)	 --> "$".
  337r_operator(:)	 --> ":".
  338r_operator(=)	 --> "=".
  339
  340r_punct('(') --> "(".
  341r_punct(')') --> ")".
  342r_punct('{') --> "{".
  343r_punct('}') --> "}".
  344r_punct('[') --> "[".
  345r_punct(']') --> "]".
  346r_punct(',') --> ",".
  347
(String) -->
  349	"#", string(Codes), eol, !,
  350	{ string_codes(String, Codes) }.
  351
  352:- if(\+ current_predicate(eol//0)).  354eol --> "\r".
  355eol --> "\n".
  356eol --> eos.
  357:- endif.