34
35:- module(regex_compat,
36 [ regex_emacs_to_advanced/2,
37 regex_convert_file/1,
38 regex_main/0
39 ]). 40:- use_module(library(pce)). 41:- use_module(library(lists)). 42
87
95
96regex_main :-
97 current_prolog_flag(argv, Files),
98 maplist(regex_convert_file, Files),
99 halt.
107regex_convert_file(File) :-
108 format(user_error, 'Converting ~w ...', [File]),
109 new(V, view),
110 get(V, text_buffer, TB),
111 send(TB, insert_file, 0, File),
112 pce_open(TB, read, In),
113 read_term(In, Term, [subterm_positions(Pos)]),
114 phrase(convert_loop(Term, Pos, In, File), Substitutions),
115 close(In),
116 substitute(Substitutions, TB),
117 atom_concat(File, '.new', NewFile),
118 send(TB, save, NewFile),
119 send(V, destroy),
120 format(user_error, ' done~n', []).
121
122substitute(Substitutions, TB) :-
123 sort(Substitutions, S1),
124 reverse(S1, S2),
125 apply_substitutions(S2, TB).
126
127apply_substitutions([], _).
128apply_substitutions([H|T], TB) :-
129 apply_substitution(H, TB),
130 apply_substitutions(T, TB).
131
132apply_substitution(substitute(From, To, New), TB) :-
133 Len is To-From,
134 send(TB, delete, From, Len),
135 send(TB, insert, From, New).
136
137convert_loop(end_of_file, _, _, _) -->
138 !,
139 [].
140convert_loop(Term, Pos, In, Src) -->
141 convert_term(Term, Pos),
142 { fix_syntax(Term, Src),
143 read_term(In, T2, [subterm_positions(P2)]) },
145 convert_loop(T2, P2, In, Src).
146
147convert_term(Atom, F-T) -->
148 !,
149 ( { atom(Atom), looks_like_regex(Atom) }
150 -> { ( regex_emacs_to_advanced(Atom, ADV)
151 -> true
152 ; format(user_error, 'failed on ~q~n', [Atom]),
153 Q = Atom
154 ),
155 sformat(Q, '~q', [ADV])
156 },
157 [ substitute(F,T,Q) ]
158 ; []
159 ).
160convert_term(_, string_position(_,_)) -->
161 !,
162 [].
163convert_term({}(A), brace_term_position(_,_,AP)) -->
164 !,
165 convert_term(A, AP).
166convert_term(List, list_position(_,_,EPs,TP)) -->
167 !,
168 convert_term_list(List, EPs, TP).
169convert_term(regex(RE), term_position(_,_,_,_,[F-T])) -->
170 { atom(RE),
171 !,
172 ( regex_emacs_to_advanced(RE, ADV)
173 -> true
174 ; format(user_error, 'failed on ~q~n', [RE]),
175 Q = RE
176 ),
177 sformat(Q, '~q', [ADV])
178 },
179 [ substitute(F,T,Q) ].
180convert_term(Term, term_position(_,_,_,_,APs)) -->
181 { Term =.. [_|As] },
182 convert_term_list(As, APs, none).
183
184convert_term_list([A|AT], [P|PT], T) -->
185 !,
186 convert_term(A, P),
187 convert_term_list(AT, PT, T).
188convert_term_list(_, [], none) -->
189 !,
190 [].
191convert_term_list(A, [], P) -->
192 !,
193 convert_term(A, P).
194
195fix_syntax(Term, _) :-
196 requires_library(Term, Lib),
197 ensure_loaded(user:Lib),
198 fail.
199fix_syntax(Term, Src) :-
200 catch(expand_term(Term, Expanded), _, Expanded=Term),
201 process(Expanded, Src).
202
203process([], _) :- !.
204process([H|T], Src) :-
205 !,
206 process(H, Src),
207 process(T, Src).
208process(:- Directive, Src) :-
209 !,
210 directive(Directive, Src).
211process(_, _).
212
213directive(use_module(Modules), Src) :-
214 !,
215 modules(Modules, Src).
216directive(pce_expansion:push_compile_operators, _) :-
217 !,
218 '$set_source_module'(SM, SM),
219 pce_expansion:push_compile_operators(SM).
220directive(pce_expansion:pop_compile_operators, _) :-
221 !,
222 pce_expansion:pop_compile_operators.
223directive(op(P,A,N), _) :-
224 !,
225 '$set_source_module'(SM, SM),
226 op(P,A,SM:N).
227directive(_, _).
228
229modules([], _) :- !.
230modules([H|T], Src) :-
231 !,
232 modules(H, Src),
233 modules(T, Src).
234modules(Module, Src) :-
235 xref_public_list(Module, _Path, Public, Src),
236 '$set_source_module'(SM, SM),
237 forall(member(op(P,A,N), Public),
238 op(P,A,SM:N)).
239
240
241requires_library((:- emacs_begin_mode(_,_,_,_,_)), library(emacs_extend)).
242requires_library((:- draw_begin_shape(_,_,_,_)), library(pcedraw)).
248looks_like_regex(Atom) :-
249 re_pattern(Pattern),
250 sub_atom(Atom, _,_,_,Pattern),
251 !.
252
253re_pattern('\\(').
254re_pattern('\\|').
255re_pattern('\\s').
256re_pattern('\\S').
257re_pattern('\\w').
264regex_emacs_to_advanced(Emacs, Advanced) :-
265 atom_codes(Emacs, Codes),
266 nb_setval(syntax, emacs),
267 phrase(re_compile(Parsed), Codes),
268 !,
269 nb_setval(syntax, advanced),
270 phrase(re_compile(Parsed), AdvCodes),
271 !,
272 atom_codes(Advanced, AdvCodes).
273
274
275 278
279re_compile(or(B0, B1)) -->
280 branch(B0),
281 bar,
282 re_compile(B1).
283re_compile(B) -->
284 branch(B).
285
286bar -->
287 ( { nb_getval(syntax, emacs) }
288 -> "\\|"
289 ; "|"
290 ).
291
292
293openb -->
294 ( { nb_getval(syntax, emacs) }
295 -> "\\("
296 ; "("
297 ).
298
299
300closeb -->
301 ( { nb_getval(syntax, emacs) }
302 -> "\\)"
303 ; ")"
304 ).
305
306
307branch([P0|PT]) -->
308 piece(P0),
309 !,
310 branch(PT).
311branch([]) -->
312 [].
313
314piece(repeat(Min, Max, Atom)) -->
315 atom(Atom),
316 qualifier(Min, Max).
317piece(Atom) -->
318 atom(Atom).
319
320qualifier(0, 1) --> "?".
321qualifier(0, infinite) --> "*".
322qualifier(1, infinite) --> "+".
323qualifier(Min, Max) -->
324 { nb_getval(syntax, advanced) },
325 "{",
326 count(Min),
327 ( ","
328 -> ( "}"
329 -> { Max = infinite }
330 ; count(Max),
331 "}"
332 )
333 ; "}",
334 { Max = Min }
335 ).
336
337atom(char(Atom)) -->
338 char(Atom).
339atom(char_class(Atom)) -->
340 char_class(Atom),
341 !.
342atom(constraint(Constraint)) -->
343 constraint(Constraint).
344atom(backref(N)) -->
345 backref(N).
346atom(regex(Atom)) -->
347 openb,
348 !,
349 re_compile(Atom),
350 closeb.
351
352constraint(wordsep) -->
353 { nb_getval(syntax, emacs) },
354 "\\b".
355constraint(wordsep) -->
356 { nb_getval(syntax, advanced) },
357 "\\y".
358
359backref(N) -->
360 "\\",
361 digitval(N).
362
363char_class(Atom) -->
364 char_class_esc(Atom),
365 !.
366char_class(Atom) -->
367 char_class_expr(Atom),
368 !.
369char_class(Atom) -->
370 wild_card_esc(Atom).
371
372char_class_expr(Expr) -->
373 "[", !, char_group(Expr), "]".
374
375char_group(and(G0, not(G1))) -->
376 { nb_getval(syntax, advanced) },
377 pos_or_neg_char_group(G0), "-",
378 char_class_expr(G1).
379char_group(G) -->
380 pos_or_neg_char_group(G).
381
382pos_or_neg_char_group(not(Group)) -->
383 "^",
384 !,
385 pos_char_group(Group).
386pos_or_neg_char_group(Group) -->
387 pos_char_group(Group).
388
389pos_char_group(or(E1, G2)) -->
390 char_group_element(E1),
391 pos_char_group(G2).
392pos_char_group(Group) -->
393 char_group_element(Group).
394
395char_group_element(Range) -->
396 char_range(Range),
397 !.
398char_group_element(Class) -->
399 char_class_esc(Class).
400char_group_element(char_in(List)) --> 401 {ground(List)},
402 List.
403
404char(C) -->
405 [C],
406 \+ { special(C) },
407 !.
408char(C) -->
409 single_char_escape(C).
410
411char_range(Range) -->
412 se_range(Range).
413char_range(char(C)) -->
414 xml_char_inc_dash(C).
415
416se_range(range(From,To)) -->
417 char_or_esc(From), "-", char_or_esc(To),
418 !.
419
420char_or_esc(C) -->
421 xml_char(C),
422 !.
423char_or_esc(C) -->
424 single_char_escape(C).
425
426xml_char(C) -->
427 [C],
428 \+ {non_xml_char(C)}.
429xml_char_inc_dash(C) -->
430 [C],
431 \+ {non_xml_char_inc_dash(C)}.
432
433non_xml_char(0'-).
434non_xml_char(0'[).
435non_xml_char(0']).
436
437non_xml_char_inc_dash(0'[).
438non_xml_char_inc_dash(0']).
439
440char_class_esc(char(C)) -->
441 single_char_escape(C),
442 !.
443char_class_esc(C) -->
444 multi_char_esc(C),
445 !.
446
447single_char_escape(C) -->
448 "\\",
449 !,
450 escape_char(C).
451
452escape_char(0'\n) --> "n".
453escape_char(0'\r) --> "r".
454escape_char(0'\t) --> "t".
455escape_char(C) -->
456 [C],
457 { special(C) }.
458
459special(0'.).
460special(0'\\).
461special(0'?).
462special(0'*).
463special(0'+).
464special(0'{) :- nb_getval(syntax, advanced).
465special(0'}) :- nb_getval(syntax, advanced).
466special(0'() :- nb_getval(syntax, advanced).
467special(0')) :- nb_getval(syntax, advanced).
468special(0'[).
469special(0']).
470special(0'|) :- nb_getval(syntax, advanced).
471
472wild_card_esc(Set) -->
473 ".",
474 { Set = not(char_in("\n\r")) }.
475
476multi_char_esc(Set) -->
477 { nb_getval(syntax, advanced) },
478 ( wild_card_esc(Set)
479 ; adv_multi_char_esc(Set)
480 ).
481multi_char_esc(Set) -->
482 { nb_getval(syntax, emacs) },
483 ( wild_card_esc(Set)
484 ; emacs_multi_char_esc(Set)
485 ).
486
487adv_multi_char_esc(char_in(" \t\n\r")) --> "\\s".
488adv_multi_char_esc(not(char_in(" \t\n\r"))) --> "\\S".
489adv_multi_char_esc(or(letter, char_in("_:"))) --> "\\i".
490adv_multi_char_esc(not(or([letter, char_in("_:")]))) --> "\\I".
491adv_multi_char_esc(xml_name_char) --> "\\c".
492adv_multi_char_esc(not(xml_name_char)) --> "\\C".
493adv_multi_char_esc(decimal_digit) --> "\\d".
494adv_multi_char_esc(not(decimal_digit)) --> "\\D".
495adv_multi_char_esc(not(or(punctuation,separator,other))) --> "\\w".
496adv_multi_char_esc(or(punctuation,separator,other)) --> "\\W".
497adv_multi_char_esc(newline) --> "\n".
498adv_multi_char_esc(upper) --> "[[:upper:]]".
499adv_multi_char_esc(not(upper)) --> "[^[:upper:]]".
500adv_multi_char_esc(lower) --> "[[:lower:]]".
501adv_multi_char_esc(not(lower)) --> "[^[:lower:]]".
502adv_multi_char_esc(punct) --> "[[:punct:]]".
503adv_multi_char_esc(not(punct)) --> "[^[:punct:]]". 504
505emacs_multi_char_esc(char_in(" \t\n\r")) --> "\\s ".
506emacs_multi_char_esc(not(char_in(" \t\n\r"))) --> "\\S ".
507emacs_multi_char_esc(decimal_digit) --> "\\sd".
508emacs_multi_char_esc(not(decimal_digit)) --> "\\Sd".
509emacs_multi_char_esc(char_in("({[")) --> "\\s(".
510emacs_multi_char_esc(not(char_in("({["))) --> "\\S(".
511emacs_multi_char_esc(newline) --> "\\sn".
512emacs_multi_char_esc(upper) --> "\\su".
513emacs_multi_char_esc(not(upper)) --> "\\Su".
514emacs_multi_char_esc(lower) --> "\\sl".
515emacs_multi_char_esc(not(lower)) --> "\\Sl".
516emacs_multi_char_esc(punct) --> "\\s.".
517emacs_multi_char_esc(not(punct)) --> "\\S.".
518emacs_multi_char_esc(not(or(punctuation,separator,other))) --> "\\w".
519emacs_multi_char_esc(or(punctuation,separator,other)) --> "\\W".
520
521
522count(N) -->
523 digit(D0),
524 digits(D),
525 { catch(number_codes(N, [D0|D]), _, fail)
526 }.
527
528digit(D) -->
529 [D],
530 { code_type(D, digit) }.
531
532digitval(Val) -->
533 [D],
534 { code_type(D, digit(Val)) }.
535
536digits([D0|Ds]) -->
537 digit(D0),
538 !,
539 digits(Ds).
540digits([]) -->
541 []