View source with raw comments or as raw
    1/*  Part of XPCE --- The SWI-Prolog GUI toolkit
    2
    3    Author:        Jan Wielemaker and Anjo Anjewierden
    4    E-mail:        jan@swi.psy.uva.nl
    5    WWW:           http://www.swi.psy.uva.nl/projects/xpce/
    6    Copyright (c)  2005-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(regex_compat,
   36          [ regex_emacs_to_advanced/2,
   37            regex_convert_file/1,
   38            regex_main/0
   39          ]).   40:- use_module(library(pce)).   41:- use_module(library(lists)).   42
   43/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   44This file deals with translating  old-style   (<  6.5.12)  EMACS regular
   45expressions into new style ones. Typically, a file is converted using
   46
   47        ?- regex_convert_file(+File)
   48
   49Creating a file <File>.new after  the   conversion.  Check the file, for
   50example using mgdiff or another (visual)   diff  tool before overwriting
   51the old file!
   52
   53Differences
   54===========
   55
   56Translation of Emacs regex to new advanced regex:
   57
   58        \(..\|..\)      -->     (..|..)
   59        (               -->     \(
   60        |               -->     \|
   61        )               -->     \)
   62        {               -->     \{
   63        }               -->     \}
   64        \s<space>       -->     \s
   65        \S<space>       -->     \S
   66        \sd             -->     \d
   67        \Sd             -->     \D
   68        \sn             -->     \n              (newline)
   69        \su             -->     [[:upper:]]
   70        \sl             -->     [[:lower:]]
   71        \s.             -->     [[:punct:]]
   72        \S.             -->     [^[:punct:]]
   73        [\]             -->     [\\]            (Bracket expressions honour \)
   74
   75About this module
   76=================
   77
   78This module is  a  rather  crude   implementation  for  an  approach  of
   79converting Prolog source code using the Prolog parser. It loads the file
   80into an XPCE text_buffer, reads the  file using position information and
   81records the change-requests. In a second stage   it makes the changes to
   82the text_buffer and saves the result.
   83
   84Notably adapting the current  syntax   (operators  etc.)  is incomplete,
   85which can cause this module to raise an exception.
   86- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
   87
   88%       regex_main
   89%
   90%       By adding the  following  line  to   the  start  of  this  file,
   91%       adjusting the path, you turn this file into a PrologScript (Unix
   92%       only).
   93%
   94%       #!/usr/bin/pl -q -g regex_main -s
   95
   96regex_main :-
   97    current_prolog_flag(argv, Files),
   98    maplist(regex_convert_file, Files),
   99    halt.
 regex_convert_file(+File)
Convert a single file, creating <File>.new on success. It is adviced the inspect the changes before moving the .new to the original file.
  107regex_convert_file(File) :-
  108    format(user_error, 'Converting ~w ...', [File]),
  109    new(V, view),
  110    get(V, text_buffer, TB),
  111    send(TB, insert_file, 0, File),
  112    pce_open(TB, read, In),
  113    read_term(In, Term, [subterm_positions(Pos)]),
  114    phrase(convert_loop(Term, Pos, In, File), Substitutions),
  115    close(In),
  116    substitute(Substitutions, TB),
  117    atom_concat(File, '.new', NewFile),
  118    send(TB, save, NewFile),
  119    send(V, destroy),
  120    format(user_error, ' done~n', []).
  121
  122substitute(Substitutions, TB) :-
  123    sort(Substitutions, S1),
  124    reverse(S1, S2),
  125    apply_substitutions(S2, TB).
  126
  127apply_substitutions([], _).
  128apply_substitutions([H|T], TB) :-
  129    apply_substitution(H, TB),
  130    apply_substitutions(T, TB).
  131
  132apply_substitution(substitute(From, To, New), TB) :-
  133    Len is To-From,
  134    send(TB, delete, From, Len),
  135    send(TB, insert, From, New).
  136
  137convert_loop(end_of_file, _, _, _) -->
  138    !,
  139    [].
  140convert_loop(Term, Pos, In, Src) -->
  141    convert_term(Term, Pos),
  142    { fix_syntax(Term, Src),
  143      read_term(In, T2, [subterm_positions(P2)]) },
  144%   { portray_clause(T2) },
  145    convert_loop(T2, P2, In, Src).
  146
  147convert_term(Atom, F-T) -->
  148    !,
  149    (   { atom(Atom), looks_like_regex(Atom) }
  150    ->  { (   regex_emacs_to_advanced(Atom, ADV)
  151          ->  true
  152          ;   format(user_error, 'failed on ~q~n', [Atom]),
  153              Q = Atom
  154          ),
  155          sformat(Q, '~q', [ADV])
  156        },
  157        [ substitute(F,T,Q) ]
  158    ;   []
  159    ).
  160convert_term(_, string_position(_,_)) -->
  161    !,
  162    [].
  163convert_term({}(A), brace_term_position(_,_,AP)) -->
  164    !,
  165    convert_term(A, AP).
  166convert_term(List, list_position(_,_,EPs,TP)) -->
  167    !,
  168    convert_term_list(List, EPs, TP).
  169convert_term(regex(RE), term_position(_,_,_,_,[F-T])) -->
  170    { atom(RE),
  171      !,
  172      (   regex_emacs_to_advanced(RE, ADV)
  173      ->  true
  174      ;   format(user_error, 'failed on ~q~n', [RE]),
  175          Q = RE
  176      ),
  177      sformat(Q, '~q', [ADV])
  178    },
  179    [ substitute(F,T,Q) ].
  180convert_term(Term, term_position(_,_,_,_,APs)) -->
  181    { Term =.. [_|As] },
  182    convert_term_list(As, APs, none).
  183
  184convert_term_list([A|AT], [P|PT], T) -->
  185    !,
  186    convert_term(A, P),
  187    convert_term_list(AT, PT, T).
  188convert_term_list(_, [], none) -->
  189    !,
  190    [].
  191convert_term_list(A, [], P) -->
  192    !,
  193    convert_term(A, P).
  194
  195fix_syntax(Term, _) :-
  196    requires_library(Term, Lib),
  197    ensure_loaded(user:Lib),
  198    fail.
  199fix_syntax(Term, Src) :-
  200    catch(expand_term(Term, Expanded), _, Expanded=Term),
  201    process(Expanded, Src).
  202
  203process([], _) :- !.
  204process([H|T], Src) :-
  205    !,
  206    process(H, Src),
  207    process(T, Src).
  208process(:- Directive, Src) :-
  209    !,
  210    directive(Directive, Src).
  211process(_, _).
  212
  213directive(use_module(Modules), Src) :-
  214    !,
  215    modules(Modules, Src).
  216directive(pce_expansion:push_compile_operators, _) :-
  217    !,
  218    '$set_source_module'(SM, SM),
  219    pce_expansion:push_compile_operators(SM).
  220directive(pce_expansion:pop_compile_operators, _) :-
  221    !,
  222    pce_expansion:pop_compile_operators.
  223directive(op(P,A,N), _) :-
  224    !,
  225    '$set_source_module'(SM, SM),
  226    op(P,A,SM:N).
  227directive(_, _).
  228
  229modules([], _) :- !.
  230modules([H|T], Src) :-
  231    !,
  232    modules(H, Src),
  233    modules(T, Src).
  234modules(Module, Src) :-
  235    xref_public_list(Module, _Path, Public, Src),
  236    '$set_source_module'(SM, SM),
  237    forall(member(op(P,A,N), Public),
  238           op(P,A,SM:N)).
  239
  240
  241requires_library((:- emacs_begin_mode(_,_,_,_,_)), library(emacs_extend)).
  242requires_library((:- draw_begin_shape(_,_,_,_)), library(pcedraw)).
 looks_like_regex(+Atom)
Succeeds if we think Atom is a regumar expression
  248looks_like_regex(Atom) :-
  249    re_pattern(Pattern),
  250    sub_atom(Atom, _,_,_,Pattern),
  251    !.
  252
  253re_pattern('\\(').
  254re_pattern('\\|').
  255re_pattern('\\s').
  256re_pattern('\\S').
  257re_pattern('\\w').
 regex_emacs_to_advanced(+Old, -New)
Convert a single regular expression.
  264regex_emacs_to_advanced(Emacs, Advanced) :-
  265    atom_codes(Emacs, Codes),
  266    nb_setval(syntax, emacs),
  267    phrase(re_compile(Parsed), Codes),
  268    !,
  269    nb_setval(syntax, advanced),
  270    phrase(re_compile(Parsed), AdvCodes),
  271    !,
  272    atom_codes(Advanced, AdvCodes).
  273
  274
  275                 /*******************************
  276                 *           REGEX DCG          *
  277                 *******************************/
  278
  279re_compile(or(B0, B1)) -->
  280    branch(B0),
  281    bar,
  282    re_compile(B1).
  283re_compile(B) -->
  284    branch(B).
  285
  286bar -->
  287    (   { nb_getval(syntax, emacs) }
  288    ->  "\\|"
  289    ;   "|"
  290    ).
  291
  292
  293openb -->
  294    (   { nb_getval(syntax, emacs) }
  295    ->  "\\("
  296    ;   "("
  297    ).
  298
  299
  300closeb -->
  301    (   { nb_getval(syntax, emacs) }
  302    ->  "\\)"
  303    ;   ")"
  304    ).
  305
  306
  307branch([P0|PT]) -->
  308    piece(P0),
  309    !,
  310    branch(PT).
  311branch([]) -->
  312    [].
  313
  314piece(repeat(Min, Max, Atom)) -->
  315    atom(Atom),
  316    qualifier(Min, Max).
  317piece(Atom) -->
  318    atom(Atom).
  319
  320qualifier(0, 1) --> "?".
  321qualifier(0, infinite) --> "*".
  322qualifier(1, infinite) --> "+".
  323qualifier(Min, Max) -->
  324    { nb_getval(syntax, advanced) },
  325    "{",
  326    count(Min),
  327    (   ","
  328    ->  (   "}"
  329        ->  { Max = infinite }
  330        ;   count(Max),
  331            "}"
  332        )
  333    ;   "}",
  334        { Max = Min }
  335    ).
  336
  337atom(char(Atom)) -->
  338    char(Atom).
  339atom(char_class(Atom)) -->
  340    char_class(Atom),
  341    !.
  342atom(constraint(Constraint)) -->
  343    constraint(Constraint).
  344atom(backref(N)) -->
  345    backref(N).
  346atom(regex(Atom)) -->
  347    openb,
  348    !,
  349    re_compile(Atom),
  350    closeb.
  351
  352constraint(wordsep) -->
  353    { nb_getval(syntax, emacs) },
  354    "\\b".
  355constraint(wordsep) -->
  356    { nb_getval(syntax, advanced) },
  357    "\\y".
  358
  359backref(N) -->
  360    "\\",
  361    digitval(N).
  362
  363char_class(Atom) -->
  364    char_class_esc(Atom),
  365    !.
  366char_class(Atom) -->
  367    char_class_expr(Atom),
  368    !.
  369char_class(Atom) -->
  370    wild_card_esc(Atom).
  371
  372char_class_expr(Expr) -->
  373    "[", !, char_group(Expr), "]".
  374
  375char_group(and(G0, not(G1))) -->
  376    { nb_getval(syntax, advanced) },
  377    pos_or_neg_char_group(G0), "-",
  378    char_class_expr(G1).
  379char_group(G) -->
  380    pos_or_neg_char_group(G).
  381
  382pos_or_neg_char_group(not(Group)) -->
  383    "^",
  384    !,
  385    pos_char_group(Group).
  386pos_or_neg_char_group(Group) -->
  387    pos_char_group(Group).
  388
  389pos_char_group(or(E1, G2)) -->
  390    char_group_element(E1),
  391    pos_char_group(G2).
  392pos_char_group(Group) -->
  393    char_group_element(Group).
  394
  395char_group_element(Range) -->
  396    char_range(Range),
  397    !.
  398char_group_element(Class) -->
  399    char_class_esc(Class).
  400char_group_element(char_in(List)) -->   % TBD: escapes for ] and ^
  401    {ground(List)},
  402    List.
  403
  404char(C) -->
  405    [C],
  406    \+ { special(C) },
  407    !.
  408char(C) -->
  409    single_char_escape(C).
  410
  411char_range(Range) -->
  412    se_range(Range).
  413char_range(char(C)) -->
  414    xml_char_inc_dash(C).
  415
  416se_range(range(From,To)) -->
  417    char_or_esc(From), "-", char_or_esc(To),
  418    !.
  419
  420char_or_esc(C) -->
  421    xml_char(C),
  422    !.
  423char_or_esc(C) -->
  424    single_char_escape(C).
  425
  426xml_char(C) -->
  427    [C],
  428    \+ {non_xml_char(C)}.
  429xml_char_inc_dash(C) -->
  430    [C],
  431    \+ {non_xml_char_inc_dash(C)}.
  432
  433non_xml_char(0'-).
  434non_xml_char(0'[).
  435non_xml_char(0']).
  436
  437non_xml_char_inc_dash(0'[).
  438non_xml_char_inc_dash(0']).
  439
  440char_class_esc(char(C)) -->
  441    single_char_escape(C),
  442    !.
  443char_class_esc(C) -->
  444    multi_char_esc(C),
  445    !.
  446
  447single_char_escape(C) -->
  448    "\\",
  449    !,
  450    escape_char(C).
  451
  452escape_char(0'\n) --> "n".
  453escape_char(0'\r) --> "r".
  454escape_char(0'\t) --> "t".
  455escape_char(C) -->
  456    [C],
  457    { special(C) }.
  458
  459special(0'.).
  460special(0'\\).
  461special(0'?).
  462special(0'*).
  463special(0'+).
  464special(0'{) :- nb_getval(syntax, advanced).
  465special(0'}) :- nb_getval(syntax, advanced).
  466special(0'() :- nb_getval(syntax, advanced).
  467special(0')) :- nb_getval(syntax, advanced).
  468special(0'[).
  469special(0']).
  470special(0'|) :- nb_getval(syntax, advanced).
  471
  472wild_card_esc(Set) -->
  473    ".",
  474    { Set = not(char_in("\n\r")) }.
  475
  476multi_char_esc(Set) -->
  477    { nb_getval(syntax, advanced) },
  478    (   wild_card_esc(Set)
  479    ;   adv_multi_char_esc(Set)
  480    ).
  481multi_char_esc(Set) -->
  482    { nb_getval(syntax, emacs) },
  483    (   wild_card_esc(Set)
  484    ;   emacs_multi_char_esc(Set)
  485    ).
  486
  487adv_multi_char_esc(char_in(" \t\n\r")) --> "\\s".
  488adv_multi_char_esc(not(char_in(" \t\n\r"))) --> "\\S".
  489adv_multi_char_esc(or(letter, char_in("_:"))) --> "\\i".
  490adv_multi_char_esc(not(or([letter, char_in("_:")]))) --> "\\I".
  491adv_multi_char_esc(xml_name_char) --> "\\c".
  492adv_multi_char_esc(not(xml_name_char)) --> "\\C".
  493adv_multi_char_esc(decimal_digit) --> "\\d".
  494adv_multi_char_esc(not(decimal_digit)) --> "\\D".
  495adv_multi_char_esc(not(or(punctuation,separator,other))) --> "\\w".
  496adv_multi_char_esc(or(punctuation,separator,other)) --> "\\W".
  497adv_multi_char_esc(newline) --> "\n".
  498adv_multi_char_esc(upper) --> "[[:upper:]]".
  499adv_multi_char_esc(not(upper)) --> "[^[:upper:]]".
  500adv_multi_char_esc(lower) --> "[[:lower:]]".
  501adv_multi_char_esc(not(lower)) --> "[^[:lower:]]".
  502adv_multi_char_esc(punct) --> "[[:punct:]]".
  503adv_multi_char_esc(not(punct)) --> "[^[:punct:]]". % ??
  504
  505emacs_multi_char_esc(char_in(" \t\n\r")) --> "\\s ".
  506emacs_multi_char_esc(not(char_in(" \t\n\r"))) --> "\\S ".
  507emacs_multi_char_esc(decimal_digit) --> "\\sd".
  508emacs_multi_char_esc(not(decimal_digit)) --> "\\Sd".
  509emacs_multi_char_esc(char_in("({[")) --> "\\s(".
  510emacs_multi_char_esc(not(char_in("({["))) --> "\\S(".
  511emacs_multi_char_esc(newline) --> "\\sn".
  512emacs_multi_char_esc(upper) --> "\\su".
  513emacs_multi_char_esc(not(upper)) --> "\\Su".
  514emacs_multi_char_esc(lower) --> "\\sl".
  515emacs_multi_char_esc(not(lower)) --> "\\Sl".
  516emacs_multi_char_esc(punct) --> "\\s.".
  517emacs_multi_char_esc(not(punct)) --> "\\S.".
  518emacs_multi_char_esc(not(or(punctuation,separator,other))) --> "\\w".
  519emacs_multi_char_esc(or(punctuation,separator,other)) --> "\\W".
  520
  521
  522count(N) -->
  523    digit(D0),
  524    digits(D),
  525    { catch(number_codes(N, [D0|D]), _, fail)
  526    }.
  527
  528digit(D) -->
  529    [D],
  530    { code_type(D, digit) }.
  531
  532digitval(Val) -->
  533    [D],
  534    { code_type(D, digit(Val)) }.
  535
  536digits([D0|Ds]) -->
  537    digit(D0),
  538    !,
  539    digits(Ds).
  540digits([]) -->
  541    []