34
35:- module(es_swish,
36 [ es_create_index/0,
37 es_add_file/1, 38 es_add/2, 39 es_add_since/1, 40 es_query/2
41 ]). 42:- use_module(elastic). 43:- use_module('../storage'). 44:- use_module('../authenticate'). 45:- use_module(library(base64)). 46:- use_module(library(broadcast)). 47:- use_module(library(solution_sequences)). 48:- use_module(library(apply)). 49:- use_module(library(pprint)). 50:- use_module(library(debug)). 51:- use_module(library(lists)). 52:- use_module(library(http/http_server)). 53
60
64
65es_create_index :-
66 commit_properties(Properties),
67 es_create_index(swish, Properties).
68
73
74es_add_since(Time) :-
75 get_time(Now),
76 Since is Now - Time,
77 forall(call_nth((storage_meta_data(File, Meta),
78 Meta.time > Since),
79 Nth),
80 es_add_file(File, 0, Nth)).
81
89
90es_add(Offset, Limit) :-
91 forall(call_nth(limit(Limit, offset(Offset, storage_file(File))), Nth),
92 es_add_file(File, Offset, Nth)).
93
94es_add_file(File, Offset, N) :-
95 Nth is Offset+N,
96 format(user_error, '\r~D ~w ... \e[K', [Nth, File]),
97 Error = error(_,_),
98 catch(es_add_file(File), Error,
99 print_message(warning, Error)).
100
107
108es_add_file(File) :-
109 storage_file(File, Data, Meta),
110 !,
111 file_name_extension(_Base, Ext, File),
112 atom_concat('p/', File, IdPath),
113 base64url(IdPath, Id0),
114 str_limit(Id0, 511, Id), 115 es_add(swish, Meta.put(_{ content: Data,
116 type:Ext
117 }), _Reply,
118 [ id(Id)
119 ]).
120es_add_file(File) :-
121 print_message(warning, gitty(no_file(File))).
122
123str_limit(Id, Limit, Id) :-
124 string_length(Id, Len),
125 Len =< Limit,
126 !.
127str_limit(Id0, Limit, Id) :-
128 sub_string(Id0, 0, Limit, _, Id).
129
133
134es_query(For, Result) :-
135 es_search(swish, For, Result).
136
137commit_properties(#{ name: #{type: keyword}, 138
139 author: #{type: keyword}, 140 avatar: #{type: keyword},
141 commit: #{type: keyword},
142 identity: #{type: keyword},
143 email: #{type: keyword},
144 profile_id: #{type: keyword},
145 peer: #{type: ip, ignore_malformed:true},
146
147 title: #{type: text}, 148 description: #{type: text},
149 commit_message: #{type: text},
150 type: #{type: keyword},
151 tags: #{type: keyword},
152 example: #{type: boolean},
153
154 data: #{type: keyword}, 155 previous: #{type: keyword},
156 content: #{type: text},
157 time: #{ type: date,
158 format:"epoch_second"
159 },
160 161 modify: #{type: keyword}, 162 public: #{type: boolean}
163 }).
164
165
166 169
170:- multifile
171 web_storage:search_sources_hook/2. 172
176
177web_storage:search_sources_hook(Query, Result) :-
178 ( catch_with_backtrace(es_search_sources_hook(Query, Result), E, true)
179 -> ( var(E)
180 -> true
181 ; message_to_string(E, Msg),
182 Result = #{ error: Msg },
183 ( debugging(elastic)
184 -> print_term(Result, [nl(true)])
185 ; true
186 )
187 )
188 ; Result = #{ error: "failed" }
189 ).
190
191es_search_sources_hook(Query, Result) :-
192 ( debugging(elastic)
193 -> print_term(Query, [nl(true)])
194 ; true
195 ),
196 partition(on_content, Query.query, OnContent, OnMeta),
197 phrase(es_filter(Query, OnMeta), Filter),
198 phrase(es_content(OnContent), Match),
199 es_ordering(OnContent, OnMeta, Query, Field, Dir),
200 dict_pairs(OrderBy, #, [Field - #{order:Dir}]),
201 ESQuery = #{query:
202 #{bool:
203 #{ must: Match,
204 filter: Filter
205 }
206 },
207 '_source': false, 208 fields:
209 [ name, 210 time,
211 author,
212 avatar,
213 tags
214 ],
215 track_total_hits: true, 216 highlight:
217 #{ fields:
218 #{ content: #{}}
219 },
220 sort: [
221 OrderBy
222 ],
223 from: Query.get(offset, 0),
224 size: Query.get(limit, 10)
225 },
226 ( debugging(elastic)
227 -> print_term(ESQuery, [nl(true)])
228 ; true
229 ),
230 es_query(ESQuery, Matches),
231 es_to_swish(Matches, Result).
232
233on_content(word(_)).
234on_content(string(_)).
235on_content(regex(_,_)).
236
240
241es_content([]) -->
242 [].
243es_content([H|T]) -->
244 es_content_1(H),
245 es_content(T).
246
247es_content_1(word(W)) -->
248 [ #{match: #{content: W}} ].
249es_content_1(string(W)) -->
250 [ #{match_phrase: #{content: W}} ].
251es_content_1(regex(RE,Flags)) -->
252 field_filter(regex(RE,Flags), content).
253
257
258es_filter(Query, OnMeta) -->
259 es_owner_filter(Query, OnMeta),
260 es_tag_filter(OnMeta),
261 es_type_filter(OnMeta),
262 es_name_filter(OnMeta).
263
264es_owner_filter(Query, OnMeta) --> 265 { memberchk(user("me"), OnMeta),
266 user_property(Query.auth, identity(Id))
267 },
268 !,
269 [ #{term: #{identity: Id}} ].
270es_owner_filter(Query, OnMeta) --> 271 { memberchk(user("me"), OnMeta),
272 NickName = Query.auth.get(display_name)
273 },
274 !,
275 field_filter(NickName, author),
276 public_filter.
277es_owner_filter(Query, OnMeta) --> 278 { memberchk(user("me"), OnMeta),
279 user_property(Query.auth, avatar(Avatar))
280 },
281 !,
282 [ #{term: #{avatar: Avatar}} ],
283 public_filter.
284es_owner_filter(_Query, OnMeta) -->
285 { memberchk(user(User), OnMeta),
286 User \== "me"
287 },
288 field_filter(User, author),
289 public_filter.
290es_owner_filter(_, _) -->
291 public_filter.
292
293public_filter -->
294 [ #{term: #{public: true}} ].
295
296es_tag_filter(Query) -->
297 { memberchk(tag(Tag), Query) },
298 !,
299 field_filter(Tag, tags).
300es_tag_filter(_) -->
301 [].
302
303es_name_filter(Query) -->
304 { memberchk(name(Name), Query) },
305 !,
306 field_filter(Name, tags).
307es_name_filter(_) -->
308 [].
309
310es_type_filter(Query) -->
311 { memberchk(type(Type), Query) },
312 !,
313 [ #{term: #{type: Type}}].
314es_type_filter(_) -->
315 [].
316
320
321field_filter(regex(RE,Flags), Field) -->
322 !,
323 { convlist(re_pair, Flags, REOpts),
324 dict_pairs(REDict, #, [Field - REProps]),
325 dict_pairs(REProps, #, [value-RE|REOpts])
326 },
327 [ #{regexp: REDict} ].
328field_filter(String, Field) -->
329 { dict_pairs(WCDict, #, [Field - WCProps]),
330 dict_pairs(WCProps, #, [value-String, case_insensitive-true])
331 },
332 [ #{wildcard: WCDict} ].
333
334re_pair(i, case_insensitive-true).
335
337
338es_ordering(_OnContent, _OnMeta, Query, Field, Dir) :-
339 _{ order_by:Field, order: Dir} :< Query,
340 !.
341es_ordering(_OnContent, _OnMeta, Query, Field, Dir) :-
342 _{ order_by:Field} :< Query,
343 !,
344 ( Field == time
345 -> Dir = desc
346 ; Dir = asc
347 ).
348es_ordering(OnContent, _OnMeta, _Query, '_score', desc) :-
349 OnContent \== [],
350 !.
351es_ordering(_OnContent, _OnMeta, _Query, 'time', desc).
352
356
357es_to_swish(ESResult, #{ matches:Matches,
358 total:Total,
359 cpu:CPU,
360 cache:false
361 }) :-
362 Hits = ESResult.hits,
363 CPU is ESResult.get(took, 0)/1000.0,
364 Total = Hits.total.value,
365 maplist(es_to_swish_hit, Hits.hits, Matches).
366
367es_to_swish_hit(ESHit, Match) :-
368 dict_pairs(ESHit.fields, _, Pairs),
369 convlist(to_single, Pairs, RPairs),
370 add_highlight(ESHit, RPairs, RPairs1),
371 dict_pairs(Match, #, RPairs1).
372
373to_single(tags-Tags, tags-Tags) :- !.
374to_single(Field-[Value], Field-Value) :- !.
375
376add_highlight(ESHit, Pairs, [highlight-Lines|Pairs]) :-
377 Lines = ESHit.get(highlight).get(content),
378 !.
379add_highlight(_, Pairs, Pairs).
380
381
382 385
386:- multifile web_storage:typeahead_hooked/1. 387
388web_storage:typeahead_hooked(file).
389web_storage:typeahead_hooked(store_content).
390
391:- multifile swish_search:typeahead/4. 392
396
397swish_search:typeahead(file, For, FileInfo, _Options) :-
398 atom_concat(For, *, Pattern),
399 ESQuery = #{query:
400 #{bool:
401 #{ must:
402 [ #{query_string: #{
403 query: Pattern,
404 fields: [name, title, tags]
405 }}
406 ],
407 filter: [ #{term: #{public: true}} ]
408 }
409 },
410 '_source': false,
411 fields:
412 [ name, 413 time,
414 author,
415 avatar,
416 tags
417 ],
418 sort:[#{time: #{order:desc}}],
419 from: 0,
420 size: 10
421 },
422 es_query(ESQuery, Matches),
423 es_to_swish(Matches, Result),
424 member(Hit, Result.matches),
425 File = Hit.name,
426 FileInfo = Hit.put(_{type:"store", file:File}).
427
428swish_search:typeahead(store_content, Text, FileInfo, Options) :-
429 http_current_request(Request),
430 authenticate(Request, Auth),
431 limit(25, se_typeahead(store_content, Text, FileInfo,
432 Options.put(auth,Auth))).
433
434se_typeahead(store_content, Text, FileInfo, Options) :-
435 option(auth(Auth), Options),
436 phrase(es_owner_filter(#{auth:Auth.put(Options)}, [user("me")]), Filter),
437 Query = #{query:
438 #{bool:
439 #{ must:
440 [ #{match: #{content: Text}}
441 ],
442 filter: Filter
443 }
444 },
445 '_source': false, 446 fields:
447 [ name, 448 time,
449 author,
450 avatar,
451 tags
452 ],
453 track_total_hits: true, 454 highlight:
455 #{ fields:
456 #{ content:
457 #{ number_of_fragments: 1
458 }
459 },
460 encoder: html,
461 boundary_chars: '\n'
462 },
463 sort:[#{time: #{order:desc}}],
464 from:0, size:10
465 },
466 ( debugging(elastic)
467 -> print_term(Query, [nl(true)])
468 ; true
469 ),
470 es_query(Query, Matches),
471 es_to_swish(Matches, Result),
472 member(Hit, Result.matches),
473 File = Hit.name,
474 member(Line, Hit.highlight),
475 FileInfo = Hit.put(_{type:"store", file:File,
476 line: 0, text:Line, query:Text,
477 encoder: html
478 }).
479
480
481 484
490
493
494:- listen(swish(created(File, _Commit)),
495 es_add_file(File)). 496:- listen(swish(updated(File, _Commit)),
497 es_add_file(File)).