1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2010-2013, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(rdf_parser, 36 [ xml_to_plrdf/3, % +XMLTerm, -RDFTerm, +State 37 element_to_plrdf/3, % +ContentList, -RDFTerm, +State 38 make_rdf_state/3, % +Options, -State, -RestOptions 39 rdf_modify_state/3, % +XMLAttrs, +State0, -State 40 rdf_name_space/1 41 ]). 42:- use_module(library(record),[(record)/1, op(_,_,record)]). 43:- use_module(library(rewrite_term), 44 [ rew_term_expansion/2,rew_goal_expansion/2, 45 op(_,_,::=) 46 ]). 47:- autoload(library(lists),[select/3,append/3,member/2]). 48:- autoload(library(occurs),[sub_term/2]). 49:- autoload(library(sgml),[xml_name/1]). 50:- autoload(library(uri),[iri_normalized/3]). 51 52 53% xml_name/1 54:- op(500, fx, \?). % Optional (attrs) 55 56term_expansion(F, T) :- rew_term_expansion(F, T). 57goal_expansion(F, T) :- rew_goal_expansion(F, T). 58 59goal_expansion(attrs(Attrs, List), Goal) :- 60 translate_attrs(List, Attrs, Goal). 61 62translate_attrs(Var, Attrs, rewrite_term(Var, Attrs)) :- 63 var(Var), 64 !. 65translate_attrs([], _, true) :- !. 66translate_attrs([H], Attrs, Goal) :- 67 !, 68 ( var(H) 69 -> Goal = rewrite_term(H, Attrs) 70 ; H = \?Optional 71 -> Goal = ( member(A, Attrs), 72 OptRewrite 73 -> true 74 ; true 75 ), 76 expand_goal(rewrite_term(\Optional, A), OptRewrite) 77 ; Goal = ( member(A, Attrs), 78 Rewrite 79 -> true 80 ), 81 expand_goal(rewrite_term(H, A), Rewrite) 82 ). 83translate_attrs([H|T], Attrs0, (G0, G1)) :- 84 !, 85 ( var(H) 86 -> G0 = rewrite_term(H, Attrs0), 87 Attrs1 = Attrs0 88 ; H = \?Optional 89 -> G0 = ( select(A, Attrs0, Attrs1), 90 OptRewrite 91 -> true 92 ; Attrs1 = Attrs0 93 ), 94 expand_goal(rewrite_term(\Optional, A), OptRewrite) 95 ; G0 = ( select(A, Attrs0, Attrs1), 96 Rewrite 97 ), 98 expand_goal(rewrite_term(H, A), Rewrite) 99 ), 100 translate_attrs(T, Attrs1, G1). 101translate_attrs(Rule, Attrs, Goal) :- 102 expand_goal(rewrite_term(Rule, Attrs), Goal). 103 104 105:- multifile rdf_name_space/1. 106:- dynamic rdf_name_space/1.
114rdf_name_space('http://www.w3.org/1999/02/22-rdf-syntax-ns#'). 115rdf_name_space('http://www.w3.org/TR/REC-rdf-syntax'). 116 117 118:- record 119 rdf_state(base_uri='', lang='', ignore_lang=false, convert_typed_literal).
132xml_to_plrdf(Element, RDF, State) :-
133 ( is_list(Element)
134 -> rewrite_term(\xml_content_objects(RDF, State), Element)
135 ; rewrite_term(\xml_objects(RDF, State), Element)
136 ).
142element_to_plrdf(Element, RDF, State) :- 143 rewrite_term(\nodeElementList(RDF, State), [Element]). 144 145xml_objects(Objects, Options0) ::= 146 E0, 147 { modify_state(E0, Options0, E, Options), !, 148 rewrite_term(\xml_objects(Objects, Options), E) 149 }. 150xml_objects(Objects, Options) ::= 151 element((\rdf('RDF'), !), 152 _, 153 \nodeElementList(Objects, Options)), 154 !. 155xml_objects(Objects, Options) ::= 156 element(_, _, \xml_content_objects(Objects, Options)). 157 158xml_content_objects([], _) ::= 159 []. 160xml_content_objects([H|T], Options) ::= 161 [ \xml_objects(H, Options) 162 | \xml_content_objects(T, Options) 163 ]. 164 165 166nodeElementList([], _Options) ::= 167 [], !. 168nodeElementList(L, Options) ::= 169 [ (\ws, !) 170 | \nodeElementList(L, Options) 171 ]. 172nodeElementList([H|T], Options) ::= 173 [ \nodeElementOrError(H, Options) 174 | \nodeElementList(T, Options) 175 ]. 176 177nodeElementOrError(H, Options) ::= 178 \nodeElement(H, Options), !. 179nodeElementOrError(unparsed(Data), _Options) ::= 180 Data. 181 182nodeElement(description(Type, About, Properties), Options) ::= 183 \description(Type, About, Properties, Options). 184 185 186 /******************************* 187 * DESCRIPTION * 188 *******************************/ 189 190description(Type, About, Properties, Options0) ::= 191 E0, 192 { modify_state(E0, Options0, E, Options), !, 193 rewrite_term(\description(Type, About, Properties, Options), E) 194 }. 195description(description, About, Properties, Options) ::= 196 element(\rdf('Description'), 197 \attrs([ \?idAboutAttr(About, Options) 198 | \propAttrs(PropAttrs, Options) 199 ]), 200 \propertyElts(PropElts, Options)), 201 { !, append(PropAttrs, PropElts, Properties) 202 }. 203description(Type, About, Properties, Options) ::= 204 element(\name_uri(Type, Options), 205 \attrs([ \?idAboutAttr(About, Options) 206 | \propAttrs(PropAttrs, Options) 207 ]), 208 \propertyElts(PropElts, Options)), 209 { append(PropAttrs, PropElts, Properties) 210 }. 211 212propAttrs([], _) ::= 213 [], !. 214propAttrs([H|T], Options) ::= 215 [ \propAttr(H, Options) 216 | \propAttrs(T, Options) 217 ]. 218 219propAttr(rdf:type = URI, Options) ::= 220 \rdf_or_unqualified(type) = \value_uri(URI, Options), !. 221propAttr(Name = Literal, Options) ::= 222 Name = Value, 223 { mkliteral(Value, Literal, Options) 224 }. 225 226propertyElts([], _) ::= 227 [], !. 228propertyElts(Elts, Options) ::= 229 [ (\ws, !) 230 | \propertyElts(Elts, Options) 231 ]. 232propertyElts([H|T], Options) ::= 233 [ \propertyElt(H, Options) 234 | \propertyElts(T, Options) 235 ]. 236 237propertyElt(E, Options) ::= 238 \propertyElt(Id, Name, Value, Options), 239 { mkprop(Name, Value, Prop), 240 ( var(Id) 241 -> E = Prop 242 ; E = id(Id, Prop) 243 ) 244 }. 245 246mkprop(NS:Local, Value, rdf:Local = Value) :- 247 rdf_name_space(NS), 248 !. 249mkprop(Name, Value, Name = Value). 250 251 252propertyElt(Id, Name, Value, Options0) ::= 253 E0, 254 { modify_state(E0, Options0, E, Options), !, 255 rewrite_term(\propertyElt(Id, Name, Value, Options), E) 256 }. 257propertyElt(Id, Name, Value, Options) ::= 258 \literalPropertyElt(Id, Name, Value, Options), !. 259propertyElt(_, Name, Literal, Options) ::= 260 element(Name, 261 \attrs([ \parseLiteral 262 ]), 263 Content), 264 { !, 265 literal_value(Content, Literal, Options) 266 }. 267propertyElt(Id, Name, collection(Elements), Options) ::= 268 element(Name, 269 \attrs([ \parseCollection, 270 \?idAttr(Id, Options) 271 ]), 272 \nodeElementList(Elements, Options)). 273 % 5.14 emptyPropertyElt 274propertyElt(Id, Name, Value, Options) ::= 275 element(Name, A, \all_ws), 276 { !, 277 rewrite_term(\emptyPropertyElt(Id, Value, Options), A) 278 }. 279 280propertyElt(_, Name, description(description, Id, Properties), Options) ::= 281 element(Name, 282 \attrs([ \parseResource, 283 \?idAboutAttr(Id, Options) 284 ]), 285 \propertyElts(Properties, Options)), 286 !. 287propertyElt(Id, Name, Literal, Options) ::= 288 element(Name, 289 \attrs([ \?idAttr(Id, Options) 290 ]), 291 [ Value ]), 292 { atom(Value), !, 293 mkliteral(Value, Literal, Options) 294 }. 295propertyElt(Id, Name, Value, Options) ::= 296 element(Name, 297 \attrs([ \?idAttr(Id, Options) 298 ]), 299 \an_rdf_object(Value, Options)), !. 300propertyElt(Id, Name, unparsed(Value), Options) ::= 301 element(Name, 302 \attrs([ \?idAttr(Id, Options) 303 ]), 304 Value). 305 306literalPropertyElt(Id, Name, Literal, Options) ::= 307 element(Name, 308 \attrs([ \typeAttr(Type, Options), 309 \?idAttr(Id, Options) 310 ]), 311 Content), 312 { typed_literal(Type, Content, Literal, Options) 313 }. 314 315emptyPropertyElt(Id, Literal, Options) ::= 316 \attrs([ \?idAttr(Id, Options), 317 \?parseLiteral 318 | \noMoreAttrs 319 ]), 320 { !, 321 mkliteral('', Literal, Options) 322 }. 323emptyPropertyElt(Id, 324 description(description, About, Properties), 325 Options) ::= 326 \attrs([ \?idAttr(Id, Options), 327 \?aboutResourceEmptyElt(About, Options), 328 \?parseResource 329 | \propAttrs(Properties, Options) 330 ]), !. 331 332aboutResourceEmptyElt(about(URI), Options) ::= 333 \resourceAttr(URI, Options), !. 334aboutResourceEmptyElt(node(URI), _Options) ::= \nodeIDAttr(URI).
346literal_value(Value, literal(type(rdf:'XMLLiteral', Value)), _).
lang(Lang)
option from Options.
353mkliteral(Text, literal(Val), Options) :-
354 atom(Text),
355 ( rdf_state_lang(Options, Lang),
356 Lang \== ''
357 -> Val = lang(Lang, Text)
358 ; Val = Text
359 ).
367typed_literal(Type, Content, literal(Object), Options) :- 368 rdf_state_convert_typed_literal(Options, Convert), 369 nonvar(Convert), 370 !, 371 ( catch(call(Convert, Type, Content, Object), E, true) 372 -> ( var(E) 373 -> true 374 ; Object = E 375 ) 376 ; Object = error(cannot_convert(Type, Content), _) 377 ). 378typed_literal(Type, [], literal(type(Type, '')), _Options) :- !. 379typed_literal(Type, [Text], literal(type(Type, Text)), _Options) :- !. 380typed_literal(Type, Content, literal(type(Type, Content)), _Options). 381 382 383idAboutAttr(id(Id), Options) ::= 384 \idAttr(Id, Options), !. 385idAboutAttr(about(About), Options) ::= 386 \aboutAttr(About, Options), !. 387idAboutAttr(node(About), _Options) ::= 388 \nodeIDAttr(About), !. 389 390%! an_rdf_object(-Object, +OptionsURI) 391% 392% Deals with an object, but there may be spaces around. I'm still 393% not sure where to deal with these. Best is to ask the XML parser 394% to get rid of them, So most likely this code will change if this 395% happens. 396 397an_rdf_object(Object, Options) ::= 398 [ \nodeElement(Object, Options) 399 ], !. 400an_rdf_object(Object, Options) ::= 401 [ (\ws, !) 402 | \an_rdf_object(Object, Options) 403 ]. 404an_rdf_object(Object, Options) ::= 405 [ \nodeElement(Object, Options), 406 \ws 407 ], !. 408 409ws ::= 410 A, 411 { atom(A), 412 atom_chars(A, Chars), 413 all_blank(Chars), ! 414 }. 415ws ::= 416 pi(_). 417 418all_ws ::= 419 [], !. 420all_ws ::= 421 [\ws | \all_ws]. 422 423all_blank([]). 424all_blank([H|T]) :- 425 char_type(H, space), % SWI-Prolog specific 426 all_blank(T). 427 428 429 /******************************* 430 * RDF ATTRIBUTES * 431 *******************************/ 432 433idAttr(Id, Options) ::= 434 \rdf_or_unqualified('ID') = \uniqueid(Id, Options). 435 436aboutAttr(About, Options) ::= 437 \rdf_or_unqualified(about) = \value_uri(About, Options). 438 439nodeIDAttr(About) ::= 440 \rdf_or_unqualified(nodeID) = About. 441 442resourceAttr(URI, Options) ::= 443 \rdf_or_unqualified(resource) = \value_uri(URI, Options). 444 445typeAttr(Type, Options) ::= 446 \rdf_or_unqualified(datatype) = \value_uri(Type, Options). 447 448name_uri(URI, Options) ::= 449 NS:Local, 450 { !, atom_concat(NS, Local, A), 451 rewrite_term(\value_uri(URI, Options), A) 452 }. 453name_uri(URI, Options) ::= 454 \value_uri(URI, Options). 455 456value_uri(URI, Options) ::= 457 A, 458 { rdf_state_base_uri(Options, Base), 459 ( Base \== [] 460 -> iri_normalized(A, Base, URI) 461 ; URI = A 462 ) 463 }. 464 465uniqueid(Id, Options) ::= 466 A, 467 { unique_xml_name(A, HashID), 468 make_globalid(HashID, Options, Id) 469 }. 470 471unique_xml_name(Name, HashID) :- 472 atom_concat(#, Name, HashID), 473 ( xml_name(Name) 474 -> true 475 ; print_message(warning, rdf(not_a_name(Name))) 476 ). 477 478make_globalid(In, Options, Id) :- 479 rdf_state_base_uri(Options, Base), 480 iri_normalized(In, Base, Id). 481 482parseLiteral ::= \rdf_or_unqualified(parseType) = 'Literal'. 483parseResource ::= \rdf_or_unqualified(parseType) = 'Resource'. 484parseCollection ::= \rdf_or_unqualified(parseType) = 'Collection'. 485 486 487 /******************************* 488 * PRIMITIVES * 489 *******************************/ 490 491rdf(Tag) ::= 492 NS:Tag, 493 { rdf_name_space(NS), ! 494 }. 495 496rdf_or_unqualified(Tag) ::= 497 Tag. 498rdf_or_unqualified(Tag) ::= 499 NS:Tag, 500 { rdf_name_space(NS), ! 501 }. 502 503 504 /******************************* 505 * BASICS * 506 *******************************/ 507 508/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 509This code is translated by the goal_expansion/2 rule at the start of 510this file. We leave the original code for reference. 511 512attrs(Bag) ::= 513 L0, 514 { do_attrs(Bag, L0) 515 }. 516 517do_attrs([], _) :- !. 518do_attrs([\?H|T], L0) :- !, % optional 519 ( select(X, L0, L), 520 rewrite_term(\H, X) 521 -> true 522 ; L = L0 523 ), 524 do_attrs(T, L). 525do_attrs([H|T], L0) :- 526 select(X, L0, L), 527 rewrite_term(H, X), !, 528 do_attrs(T, L). 529do_attrs(C, L) :- 530 rewrite_term(C, L). 531- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 532 533% \noMoreAttrs 534% 535% Check attribute-list is empty. Reserved xml: attributes are 536% excluded from this test. 537 538noMoreAttrs ::= 539 [], !. 540noMoreAttrs ::= [ xml:_=_ | \noMoreAttrs ].
base_uri(_)
in the Options
It Element0 contains xml:lang = Lang, strip it from the
attributes list and update lang(_)
in the Options
Remove all xmlns=_, xmlns:_=_ and xml:_=_. Only succeed if something changed.
556modify_state(element(Name, Attrs0, Content), Options0, 557 element(Name, Attrs, Content), Options) :- 558 modify_a_state(Attrs0, Options0, Attrs, Options), 559 Attrs0 \== Attrs. 560 561rdf_modify_state(Attributes, State0, State) :- 562 modify_a_state(Attributes, State0, _, State). 563 564 565modify_a_state([], Options, [], Options). 566modify_a_state([Name=Value|T0], Options0, T, Options) :- 567 modify_a(Name, Value, Options0, Options1), 568 !, 569 modify_a_state(T0, Options1, T, Options). 570modify_a_state([H|T0], Options0, [H|T], Options) :- 571 modify_a_state(T0, Options0, T, Options). 572 573 574modify_a(xml:base, Base1, Options0, Options) :- 575 !, 576 rdf_state_base_uri(Options0, Base0), 577 remove_fragment(Base1, Base2), 578 iri_normalized(Base2, Base0, Base), 579 set_base_uri_of_rdf_state(Base, Options0, Options). 580modify_a(xml:lang, Lang, Options0, Options) :- 581 !, 582 rdf_state_ignore_lang(Options0, false), 583 !, 584 set_lang_of_rdf_state(Lang, Options0, Options). 585modify_a(xmlns, _, Options, Options). 586modify_a(xmlns:_, _, Options, Options). 587modify_a(xml:_, _, Options, Options).
594remove_fragment(URI, Plain) :- 595 sub_atom(URI, B, _, _, #), 596 !, 597 sub_atom(URI, 0, B, _, Plain). 598remove_fragment(URI, URI). 599 600 601 /******************************* 602 * HELP PCE-EMACS A BIT * 603 *******************************/ 604 605:- multifile 606 emacs_prolog_colours:term_colours/2, 607 emacs_prolog_colours:goal_classification/2. 608 609expand(c(X), _, X) :- !. 610expand(In, Pattern, Colours) :- 611 compound(In), 612 !, 613 In =.. [F|Args], 614 expand_list(Args, PatternArgs, ColourArgs), 615 Pattern =.. [F|PatternArgs], 616 Colours = functor(F) - ColourArgs. 617expand(X, X, classify). 618 619expand_list([], [], []). 620expand_list([H|T], [PH|PT], [CH|CT]) :- 621 expand(H, PH, CH), 622 expand_list(T, PT, CT). 623 624:- discontiguous 625 term_expansion/2. 626 627term_expansion(term_colours(C), 628 emacs_prolog_colours:term_colours(Pattern, Colours)) :- 629 expand(C, Pattern, Colours). 630 631term_colours((c(head(+(1))) ::= c(match), {c(body)})). 632term_colours((c(head(+(1))) ::= c(match))). 633 634emacs_prolog_colours:goal_classification(\_, expanded). 635 636:- dynamic 637 prolog:meta_goal/2. 638:- multifile 639 prolog:meta_goal/2, 640 prolog:called_by/2. 641 642prologmeta_goal(rewrite_term(A, _), [A]). 643prologmeta_goal(\A, [A+1]). 644 645prologcalled_by(attrs(Attrs, _Term), Called) :- 646 findall(G+1, sub_term(\?G, Attrs), Called, Tail), 647 findall(G+1, sub_term(\G, Attrs), Tail)