34
35:- module(es_swish,
36 [ es_create_index/0,
37 es_add_file/1, 38 es_add/2, 39 es_add_since/1, 40 es_query/2
41 ]). 42:- use_module(elastic). 43:- use_module('../storage'). 44:- use_module('../authenticate'). 45:- use_module(library(base64)). 46:- use_module(library(broadcast)). 47:- use_module(library(solution_sequences)). 48:- use_module(library(apply)). 49:- use_module(library(pprint)). 50:- use_module(library(debug)). 51:- use_module(library(lists)). 52:- use_module(library(http/http_server)).
65es_create_index :-
66 commit_properties(Properties),
67 es_create_index(swish, Properties).
74es_add_since(Time) :-
75 get_time(Now),
76 Since is Now - Time,
77 forall(call_nth((storage_meta_data(File, Meta),
78 Meta.time > Since),
79 Nth),
80 es_add_file(File, 0, Nth)).
90es_add(Offset, Limit) :-
91 forall(call_nth(limit(Limit, offset(Offset, storage_file(File))), Nth),
92 es_add_file(File, Offset, Nth)).
93
94es_add_file(File, Offset, N) :-
95 Nth is Offset+N,
96 format(user_error, '\r~D ~w ... \e[K', [Nth, File]),
97 Error = error(_,_),
98 catch(es_add_file(File), Error,
99 print_message(warning, Error)).
108es_add_file(File) :-
109 storage_file(File, Data, Meta),
110 !,
111 file_name_extension(_Base, Ext, File),
112 atom_concat('p/', File, IdPath),
113 base64url(IdPath, Id0),
114 str_limit(Id0, 511, Id), 115 es_add(swish, Meta.put(_{ content: Data,
116 type:Ext
117 }), _Reply,
118 [ id(Id)
119 ]).
120es_add_file(File) :-
121 print_message(warning, gitty(no_file(File))).
122
123str_limit(Id, Limit, Id) :-
124 string_length(Id, Len),
125 Len =< Limit,
126 !.
127str_limit(Id0, Limit, Id) :-
128 sub_string(Id0, 0, Limit, _, Id).
134es_query(For, Result) :-
135 es_search(swish, For, Result).
136
137commit_properties(#{ name: #{type: keyword}, 138
139 author: #{type: keyword}, 140 avatar: #{type: keyword},
141 commit: #{type: keyword},
142 identity: #{type: keyword},
143 email: #{type: keyword},
144 profile_id: #{type: keyword},
145 peer: #{type: ip, ignore_malformed:true},
146
147 title: #{type: text}, 148 description: #{type: text},
149 commit_message: #{type: text},
150 type: #{type: keyword},
151 tags: #{type: keyword},
152 example: #{type: boolean},
153
154 data: #{type: keyword}, 155 previous: #{type: keyword},
156 content: #{type: text},
157 time: #{ type: date,
158 format:"epoch_second"
159 },
160 161 modify: #{type: keyword}, 162 public: #{type: boolean}
163 }).
164
165
166 169
170:- multifile
171 web_storage:search_sources_hook/2.
177web_storage:search_sources_hook(Query, Result) :-
178 ( catch_with_backtrace(es_search_sources_hook(Query, Result), E, true)
179 -> ( var(E)
180 -> true
181 ; message_to_string(E, Msg),
182 Result = #{ error: Msg },
183 ( debugging(elastic)
184 -> print_term(Result, [nl(true)])
185 ; true
186 )
187 )
188 ; Result = #{ error: "failed" }
189 ).
190
191es_search_sources_hook(Query, Result) :-
192 ( debugging(elastic)
193 -> print_term(Query, [nl(true)])
194 ; true
195 ),
196 partition(on_content, Query.query, OnContent, OnMeta),
197 phrase(es_filter(Query, OnMeta), Filter),
198 phrase(es_content(OnContent), Match),
199 es_ordering(OnContent, OnMeta, Query, Field, Dir),
200 dict_pairs(OrderBy, #, [Field - #{order:Dir}]),
201 ESQuery = #{query:
202 #{bool:
203 #{ must: Match,
204 filter: Filter
205 }
206 },
207 '_source': false, 208 fields:
209 [ name, 210 time,
211 author,
212 avatar,
213 tags
214 ],
215 track_total_hits: true, 216 highlight:
217 #{ fields:
218 #{ content: #{}}
219 },
220 sort: [
221 OrderBy
222 ],
223 from: Query.get(offset, 0),
224 size: Query.get(limit, 10)
225 },
226 ( debugging(elastic)
227 -> print_term(ESQuery, [nl(true)])
228 ; true
229 ),
230 es_query(ESQuery, Matches),
231 es_to_swish(Matches, Result).
232
233on_content(word(_)).
234on_content(string(_)).
235on_content(regex(_,_)).
241es_content([]) -->
242 [].
243es_content([H|T]) -->
244 es_content_1(H),
245 es_content(T).
246
247es_content_1(word(W)) -->
248 [ #{match: #{content: W}} ].
249es_content_1(string(W)) -->
250 [ #{match_phrase: #{content: W}} ].
251es_content_1(regex(RE,Flags)) -->
252 field_filter(regex(RE,Flags), content).
258es_filter(Query, OnMeta) -->
259 es_owner_filter(Query, OnMeta),
260 es_tag_filter(OnMeta),
261 es_type_filter(OnMeta),
262 es_name_filter(OnMeta).
263
264es_owner_filter(Query, OnMeta) --> 265 { memberchk(user("me"), OnMeta),
266 user_property(Query.auth, identity(Id))
267 },
268 !,
269 [ #{term: #{identity: Id}} ].
270es_owner_filter(Query, OnMeta) --> 271 { memberchk(user("me"), OnMeta),
272 NickName = Query.auth.get(display_name)
273 },
274 !,
275 field_filter(NickName, author),
276 public_filter.
277es_owner_filter(Query, OnMeta) --> 278 { memberchk(user("me"), OnMeta),
279 user_property(Query.auth, avatar(Avatar))
280 },
281 !,
282 [ #{term: #{avatar: Avatar}} ],
283 public_filter.
284es_owner_filter(_Query, OnMeta) -->
285 { memberchk(user(User), OnMeta),
286 User \== "me"
287 },
288 field_filter(User, author),
289 public_filter.
290es_owner_filter(_, _) -->
291 public_filter.
292
293public_filter -->
294 [ #{term: #{public: true}} ].
295
296es_tag_filter(Query) -->
297 { memberchk(tag(Tag), Query) },
298 !,
299 field_filter(Tag, tags).
300es_tag_filter(_) -->
301 [].
302
303es_name_filter(Query) -->
304 { memberchk(name(Name), Query) },
305 !,
306 field_filter(Name, tags).
307es_name_filter(_) -->
308 [].
309
310es_type_filter(Query) -->
311 { memberchk(type(Type), Query) },
312 !,
313 [ #{term: #{type: Type}}].
314es_type_filter(_) -->
315 [].
321field_filter(regex(RE,Flags), Field) -->
322 !,
323 { convlist(re_pair, Flags, REOpts),
324 dict_pairs(REDict, #, [Field - REProps]),
325 dict_pairs(REProps, #, [value-RE|REOpts])
326 },
327 [ #{regexp: REDict} ].
328field_filter(String, Field) -->
329 { dict_pairs(WCDict, #, [Field - WCProps]),
330 dict_pairs(WCProps, #, [value-String, case_insensitive-true])
331 },
332 [ #{wildcard: WCDict} ].
333
334re_pair(i, case_insensitive-true).
338es_ordering(_OnContent, _OnMeta, Query, Field, Dir) :-
339 _{ order_by:Field, order: Dir} :< Query,
340 !.
341es_ordering(_OnContent, _OnMeta, Query, Field, Dir) :-
342 _{ order_by:Field} :< Query,
343 !,
344 ( Field == time
345 -> Dir = desc
346 ; Dir = asc
347 ).
348es_ordering(OnContent, _OnMeta, _Query, '_score', desc) :-
349 OnContent \== [],
350 !.
351es_ordering(_OnContent, _OnMeta, _Query, 'time', desc).
357es_to_swish(ESResult, #{ matches:Matches,
358 total:Total,
359 cpu:CPU,
360 cache:false
361 }) :-
362 Hits = ESResult.hits,
363 CPU is ESResult.get(took, 0)/1000.0,
364 Total = Hits.total.value,
365 maplist(es_to_swish_hit, Hits.hits, Matches).
366
367es_to_swish_hit(ESHit, Match) :-
368 dict_pairs(ESHit.fields, _, Pairs),
369 convlist(to_single, Pairs, RPairs),
370 add_highlight(ESHit, RPairs, RPairs1),
371 dict_pairs(Match, #, RPairs1).
372
373to_single(tags-Tags, tags-Tags) :- !.
374to_single(Field-[Value], Field-Value) :- !.
375
376add_highlight(ESHit, Pairs, [highlight-Lines|Pairs]) :-
377 Lines = ESHit.get(highlight).get(content),
378 !.
379add_highlight(_, Pairs, Pairs).
380
381
382 385
386:- multifile web_storage:typeahead_hooked/1. 387
388web_storage:typeahead_hooked(file).
389web_storage:typeahead_hooked(store_content).
390
391:- multifile swish_search:typeahead/4.
397swish_search:typeahead(file, For, FileInfo, _Options) :-
398 atom_concat(For, *, Pattern),
399 ESQuery = #{query:
400 #{bool:
401 #{ must:
402 [ #{query_string: #{
403 query: Pattern,
404 fields: [name, title, tags]
405 }}
406 ],
407 filter: [ #{term: #{public: true}} ]
408 }
409 },
410 '_source': false,
411 fields:
412 [ name, 413 time,
414 author,
415 avatar,
416 tags
417 ],
418 sort:[#{time: #{order:desc}}],
419 from: 0,
420 size: 10
421 },
422 es_query(ESQuery, Matches),
423 es_to_swish(Matches, Result),
424 member(Hit, Result.matches),
425 File = Hit.name,
426 FileInfo = Hit.put(_{type:"store", file:File}).
427
428swish_search:typeahead(store_content, Text, FileInfo, Options) :-
429 http_current_request(Request),
430 authenticate(Request, Auth),
431 limit(25, se_typeahead(store_content, Text, FileInfo,
432 Options.put(auth,Auth))).
433
434se_typeahead(store_content, Text, FileInfo, Options) :-
435 option(auth(Auth), Options),
436 phrase(es_owner_filter(#{auth:Auth.put(Options)}, [user("me")]), Filter),
437 Query = #{query:
438 #{bool:
439 #{ must:
440 [ #{match: #{content: Text}}
441 ],
442 filter: Filter
443 }
444 },
445 '_source': false, 446 fields:
447 [ name, 448 time,
449 author,
450 avatar,
451 tags
452 ],
453 track_total_hits: true, 454 highlight:
455 #{ fields:
456 #{ content:
457 #{ number_of_fragments: 1
458 }
459 },
460 encoder: html,
461 boundary_chars: '\n'
462 },
463 sort:[#{time: #{order:desc}}],
464 from:0, size:10
465 },
466 ( debugging(elastic)
467 -> print_term(Query, [nl(true)])
468 ; true
469 ),
470 es_query(Query, Matches),
471 es_to_swish(Matches, Result),
472 member(Hit, Result.matches),
473 File = Hit.name,
474 member(Line, Hit.highlight),
475 FileInfo = Hit.put(_{type:"store", file:File,
476 line: 0, text:Line, query:Text,
477 encoder: html
478 }).
479
480
481 484
490
493
494:- listen(swish(created(File, _Commit)),
495 es_add_file(File)). 496:- listen(swish(updated(File, _Commit)),
497 es_add_file(File)).
SWISH integration of Elastic Search
This module hooks into
storage.pl
to provide full text and metadata search over the file storage. This module listens to saved files to index new or updated files. */