1 |
|
%%------------------------------------------------------------------ |
2 |
|
%% Copyright 2018 Erlang Solutions Ltd. |
3 |
|
%% |
4 |
|
%% Licensed under the Apache License, Version 2.0 (the "License"); |
5 |
|
%% you may not use this file except in compliance with the License. |
6 |
|
%% You may obtain a copy of the License at |
7 |
|
%% |
8 |
|
%% http://www.apache.org/licenses/LICENSE-2.0 |
9 |
|
%% |
10 |
|
%% Unless required by applicable law or agreed to in writing, software |
11 |
|
%% distributed under the License is distributed on an "AS IS" BASIS, |
12 |
|
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 |
|
%% See the License for the specific language governing permissions and |
14 |
|
%% limitations under the License. |
15 |
|
%% |
16 |
|
%% @doc ElasticSearch backend for Message Archive Management for |
17 |
|
%% one-to-one messages. |
18 |
|
%%%------------------------------------------------------------------ |
19 |
|
-module(mod_mam_elasticsearch_arch). |
20 |
|
|
21 |
|
-behaviour(gen_mod). |
22 |
|
-behaviour(ejabberd_gen_mam_archive). |
23 |
|
-behaviour(mongoose_module_metrics). |
24 |
|
|
25 |
|
%% gen_mod callbacks |
26 |
|
-export([start/2]). |
27 |
|
-export([stop/1]). |
28 |
|
-export([hooks/1]). |
29 |
|
|
30 |
|
%% ejabberd_gen_mam_archive callbacks |
31 |
|
-export([archive_message/3]). |
32 |
|
-export([lookup_messages/3]). |
33 |
|
-export([remove_archive/3]). |
34 |
|
-export([archive_size/3]). |
35 |
|
-export([get_mam_pm_gdpr_data/3]). |
36 |
|
|
37 |
|
-include("mongoose.hrl"). |
38 |
|
-include("mongoose_rsm.hrl"). |
39 |
|
-include("mod_mam.hrl"). |
40 |
|
-include("jlib.hrl"). |
41 |
|
|
42 |
|
-define(INDEX_NAME, <<"messages">>). |
43 |
|
-define(TYPE_NAME, <<"pm">>). |
44 |
|
|
45 |
|
%%------------------------------------------------------------------- |
46 |
|
%% gen_mod callbacks |
47 |
|
%%------------------------------------------------------------------- |
48 |
|
|
49 |
|
-spec start(mongooseim:host_type(), gen_mod:module_opts()) -> ok. |
50 |
|
start(_HostType, _Opts) -> |
51 |
:-( |
ok. |
52 |
|
|
53 |
|
-spec stop(mongooseim:host_type()) -> ok. |
54 |
|
stop(_HostType) -> |
55 |
:-( |
ok. |
56 |
|
|
57 |
|
-spec get_mam_pm_gdpr_data(Acc, Params, Extra) -> {ok, Acc} when |
58 |
|
Acc :: ejabberd_gen_mam_archive:mam_pm_gdpr_data(), |
59 |
|
Params :: #{jid := jid:jid()}, |
60 |
|
Extra :: gen_hook:extra(). |
61 |
|
get_mam_pm_gdpr_data(Acc, #{jid := Owner}, _Extra) -> |
62 |
:-( |
BinOwner = mod_mam_utils:bare_jid(Owner), |
63 |
:-( |
Filter = #{term => #{owner => BinOwner}}, |
64 |
:-( |
Sorting = #{mam_id => #{order => asc}}, |
65 |
:-( |
SearchQuery = #{query => #{bool => #{filter => Filter}}, sort => Sorting}, |
66 |
:-( |
{ok, #{<<"hits">> := #{<<"hits">> := Hits}}} |
67 |
|
= mongoose_elasticsearch:search(?INDEX_NAME, ?TYPE_NAME, SearchQuery), |
68 |
:-( |
Messages = lists:map(fun hit_to_gdpr_mam_message/1, Hits), |
69 |
:-( |
{ok, Messages ++ Acc}. |
70 |
|
|
71 |
|
%%------------------------------------------------------------------- |
72 |
|
%% ejabberd_gen_mam_archive callbacks |
73 |
|
%%------------------------------------------------------------------- |
74 |
|
|
75 |
|
-spec archive_message(Acc, Params, Extra) -> {ok, Acc} when |
76 |
|
Acc :: ok | {error, term()}, |
77 |
|
Params :: mod_mam:archive_message_params(), |
78 |
|
Extra :: gen_hook:extra(). |
79 |
|
archive_message(_Result, |
80 |
|
#{message_id := MessageId, |
81 |
|
local_jid := LocalJid, |
82 |
|
remote_jid := RemoteJid, |
83 |
|
source_jid := SourceJid, |
84 |
|
packet := Packet, |
85 |
|
is_groupchat := IsGroupChat}, |
86 |
|
#{host_type := Host}) -> |
87 |
:-( |
Owner = mod_mam_utils:bare_jid(LocalJid), |
88 |
:-( |
Remote = mod_mam_utils:bare_jid(RemoteJid), |
89 |
:-( |
SourceBinJid = mod_mam_utils:full_jid(SourceJid), |
90 |
:-( |
DocId = make_document_id(Owner, MessageId), |
91 |
:-( |
IsGroupChatBin = atom_to_binary(IsGroupChat), |
92 |
:-( |
Doc = make_document(MessageId, Owner, Remote, SourceBinJid, Packet, IsGroupChatBin), |
93 |
:-( |
case mongoose_elasticsearch:insert_document(?INDEX_NAME, ?TYPE_NAME, DocId, Doc) of |
94 |
|
{ok, _} -> |
95 |
:-( |
{ok, ok}; |
96 |
|
{error, Reason} = Err -> |
97 |
:-( |
?LOG_ERROR(#{what => archive_message_failed, |
98 |
|
user => Owner, server => Host, remote => Remote, |
99 |
:-( |
message_id => MessageId, reason => Reason}), |
100 |
:-( |
mongoose_metrics:update(Host, modMamDropped, 1), |
101 |
:-( |
{ok, Err} |
102 |
|
end. |
103 |
|
|
104 |
|
-spec lookup_messages(Acc, Params, Extra) -> {ok, Acc} when |
105 |
|
Acc :: {ok, mod_mam:lookup_result()} | {error, term()}, |
106 |
|
Params :: mam_iq:lookup_params(), |
107 |
|
Extra :: gen_hook:extra(). |
108 |
|
lookup_messages(Result, |
109 |
|
#{rsm := #rsm_in{direction = before, id = ID} = RSM} = Params, |
110 |
|
#{host_type := HostType}) |
111 |
|
when ID =/= undefined -> |
112 |
:-( |
{ok, lookup_message_page(Result, HostType, RSM, Params)}; |
113 |
|
lookup_messages(Result, |
114 |
|
#{rsm := #rsm_in{direction = aft, id = ID} = RSM} = Params, |
115 |
|
#{host_type := HostType}) |
116 |
|
when ID =/= undefined -> |
117 |
:-( |
{ok, lookup_message_page(Result, HostType, RSM, Params)}; |
118 |
|
lookup_messages(Result, Params, #{host_type := HostType}) -> |
119 |
:-( |
{ok, do_lookup_messages(Result, HostType, Params)}. |
120 |
|
|
121 |
|
lookup_message_page(AccResult, Host, #rsm_in{id = _ID} = RSM, #{message_id := MsgID} = Params) -> |
122 |
:-( |
PageSize = maps:get(page_size, Params), |
123 |
:-( |
case do_lookup_messages(AccResult, Host, Params#{page_size := 1 + PageSize}) of |
124 |
:-( |
{error, _} = Err -> Err; |
125 |
|
{ok, LookupResult} -> |
126 |
:-( |
case MsgID of |
127 |
|
undefined -> |
128 |
:-( |
mod_mam_utils:check_for_item_not_found(RSM, PageSize, LookupResult); |
129 |
|
_ -> |
130 |
:-( |
{ok, LookupResult} |
131 |
|
end |
132 |
|
end. |
133 |
|
|
134 |
|
do_lookup_messages(_Result, Host, Params) -> |
135 |
:-( |
SearchQuery0 = build_search_query(Params), |
136 |
:-( |
Sorting = [#{mam_id => #{order => determine_sorting(Params)}}], |
137 |
:-( |
ResultLimit = maps:get(page_size, Params), |
138 |
:-( |
SearchQuery1 = SearchQuery0#{sort => Sorting, |
139 |
|
size => ResultLimit}, |
140 |
:-( |
SearchQuery2 = maybe_add_from_constraint(SearchQuery1, Params), |
141 |
:-( |
case mongoose_elasticsearch:search(?INDEX_NAME, ?TYPE_NAME, SearchQuery2) of |
142 |
|
{ok, Result} -> |
143 |
:-( |
{ok, search_result_to_mam_lookup_result(Result, Params)}; |
144 |
|
{error, Reason} = Err -> |
145 |
:-( |
?LOG_ERROR(maps:merge(Params, |
146 |
|
#{what => lookup_messages_failed, |
147 |
:-( |
server => Host, reason => Reason})), |
148 |
:-( |
Err |
149 |
|
end. |
150 |
|
|
151 |
|
-spec archive_size(Acc, Params, Extra) -> {ok, Acc} when |
152 |
|
Acc :: integer(), |
153 |
|
Params :: #{archive_id := mod_mam:archive_id() | undefined, owner := jid:jid()}, |
154 |
|
Extra :: gen_hook:extra(). |
155 |
|
archive_size(_Size, #{owner := OwnerJid}, _Extra)-> |
156 |
:-( |
SearchQuery = build_search_query(#{owner_jid => OwnerJid}), |
157 |
:-( |
{ok, archive_size(SearchQuery)}. |
158 |
|
|
159 |
|
-spec remove_archive(Acc, Params, Extra) -> {ok, Acc} when |
160 |
|
Acc :: term(), |
161 |
|
Params :: #{archive_id := mod_mam:archive_id() | undefined, owner := jid:jid()}, |
162 |
|
Extra :: gen_hook:extra(). |
163 |
|
remove_archive(Acc, #{owner := OwnerJid}, #{host_type := HostType}) -> |
164 |
:-( |
remove_archive(HostType, OwnerJid), |
165 |
:-( |
{ok, Acc}. |
166 |
|
|
167 |
|
remove_archive(Host, OwnerJid) -> |
168 |
:-( |
SearchQuery = build_search_query(#{owner_jid => OwnerJid}), |
169 |
:-( |
case mongoose_elasticsearch:delete_by_query(?INDEX_NAME, ?TYPE_NAME, SearchQuery) of |
170 |
|
ok -> |
171 |
:-( |
ok; |
172 |
|
{error, Reason} -> |
173 |
:-( |
?LOG_ERROR(#{what => remove_archive_failed, |
174 |
:-( |
server => Host, user_jid => OwnerJid, reason => Reason}), |
175 |
:-( |
ok |
176 |
|
end. |
177 |
|
|
178 |
|
%%------------------------------------------------------------------- |
179 |
|
%% Helpers |
180 |
|
%%------------------------------------------------------------------- |
181 |
|
|
182 |
|
-spec hooks(mongooseim:host_type()) -> gen_hook:hook_list(). |
183 |
|
hooks(Host) -> |
184 |
:-( |
[{mam_archive_message, Host, fun ?MODULE:archive_message/3, #{}, 50}, |
185 |
|
{mam_lookup_messages, Host, fun ?MODULE:lookup_messages/3, #{}, 50}, |
186 |
|
{mam_archive_size, Host, fun ?MODULE:archive_size/3, #{}, 50}, |
187 |
|
{mam_remove_archive, Host, fun ?MODULE:remove_archive/3, #{}, 50}, |
188 |
|
{get_mam_pm_gdpr_data, Host, fun ?MODULE:get_mam_pm_gdpr_data/3, #{}, 50}]. |
189 |
|
|
190 |
|
-spec make_document_id(binary(), mod_mam:message_id()) -> binary(). |
191 |
|
make_document_id(Owner, MessageId) -> |
192 |
:-( |
<<Owner/binary, $$, (integer_to_binary(MessageId))/binary>>. |
193 |
|
|
194 |
|
-spec make_document(mod_mam:message_id(), binary(), binary(), |
195 |
|
binary(), exml:element(), binary()) -> |
196 |
|
map(). |
197 |
|
make_document(MessageId, Owner, Remote, SourceBinJid, Packet, IsGroupChat) -> |
198 |
:-( |
#{mam_id => MessageId, |
199 |
|
owner => Owner, |
200 |
|
remote => Remote, |
201 |
|
source_jid => SourceBinJid, |
202 |
|
message => exml:to_binary(Packet), |
203 |
|
body => exml_query:path(Packet, [{element, <<"body">>}, cdata]), |
204 |
|
is_groupchat => IsGroupChat |
205 |
|
}. |
206 |
|
|
207 |
|
-spec build_search_query(map()) -> mongoose_elasticsearch:query(). |
208 |
|
build_search_query(Params) -> |
209 |
:-( |
Filters = build_filters(Params), |
210 |
:-( |
TextSearchQuery = build_text_search_query(Params), |
211 |
:-( |
#{query => |
212 |
|
#{bool => |
213 |
|
#{must => TextSearchQuery, |
214 |
|
filter => Filters}}}. |
215 |
|
|
216 |
|
-spec build_filters(map()) -> [map()]. |
217 |
|
build_filters(Params) -> |
218 |
:-( |
Builders = [fun owner_filter/1, |
219 |
|
fun with_jid_filter/1, |
220 |
|
fun is_groupchat_filter/1, |
221 |
|
fun specific_message_filter/1, |
222 |
|
fun range_filter/1], |
223 |
:-( |
lists:flatmap(fun(F) -> F(Params) end, Builders). |
224 |
|
|
225 |
|
-spec owner_filter(map()) -> [map()]. |
226 |
|
owner_filter(#{owner_jid := Owner}) -> |
227 |
:-( |
BinOwner = mod_mam_utils:bare_jid(Owner), |
228 |
:-( |
[#{term => #{owner => BinOwner}}]. |
229 |
|
|
230 |
|
-spec with_jid_filter(map()) -> [map()]. |
231 |
|
with_jid_filter(#{with_jid := #jid{} = WithJid}) -> |
232 |
:-( |
[#{term => #{remote => mod_mam_utils:bare_jid(WithJid)}}]; |
233 |
|
with_jid_filter(_) -> |
234 |
:-( |
[]. |
235 |
|
|
236 |
|
-spec is_groupchat_filter(map()) -> [map()]. |
237 |
|
is_groupchat_filter(#{include_groupchat := false}) -> |
238 |
:-( |
[#{term => #{is_groupchat => <<"false">>}}]; |
239 |
|
is_groupchat_filter(_) -> |
240 |
:-( |
[]. |
241 |
|
|
242 |
|
-spec specific_message_filter(map()) -> [map()]. |
243 |
|
specific_message_filter(#{message_id := ID}) when is_integer(ID) -> |
244 |
:-( |
[#{term => #{mam_id => ID}}]; |
245 |
|
specific_message_filter(_) -> |
246 |
:-( |
[]. |
247 |
|
|
248 |
|
-spec range_filter(map()) -> [map()]. |
249 |
|
range_filter(#{end_ts := End, start_ts := Start, borders := Borders, rsm := RSM}) -> |
250 |
:-( |
{StartId, EndId} = mod_mam_utils:calculate_msg_id_borders(RSM, Borders, Start, End), |
251 |
:-( |
Range1 = maybe_add_end_filter(EndId, #{}), |
252 |
:-( |
Range2 = maybe_add_start_filter(StartId, Range1), |
253 |
:-( |
case maps:size(Range2) of |
254 |
|
0 -> |
255 |
:-( |
[]; |
256 |
|
_ -> |
257 |
:-( |
[#{range => #{mam_id => Range2}}] |
258 |
|
end; |
259 |
|
range_filter(_) -> |
260 |
:-( |
[]. |
261 |
|
|
262 |
|
-spec maybe_add_end_filter(undefined | mod_mam:message_id(), map()) -> map(). |
263 |
|
maybe_add_end_filter(undefined, RangeMap) -> |
264 |
:-( |
RangeMap; |
265 |
|
maybe_add_end_filter(Value, RangeMap) -> |
266 |
:-( |
RangeMap#{le => Value}. |
267 |
|
|
268 |
|
-spec maybe_add_start_filter(undefined | mod_mam:message_id(), map()) -> map(). |
269 |
|
maybe_add_start_filter(undefined, RangeMap) -> |
270 |
:-( |
RangeMap; |
271 |
|
maybe_add_start_filter(Value, RangeMap) -> |
272 |
:-( |
RangeMap#{ge => Value}. |
273 |
|
|
274 |
|
-spec build_text_search_query(map()) -> map(). |
275 |
|
build_text_search_query(#{search_text := SearchText}) when is_binary(SearchText) -> |
276 |
:-( |
#{simple_query_string => #{query => SearchText, |
277 |
|
fields => [<<"body">>], |
278 |
|
default_operator => <<"and">>}}; |
279 |
|
build_text_search_query(_) -> |
280 |
:-( |
#{match_all => #{}}. |
281 |
|
|
282 |
|
-spec determine_sorting(map()) -> asc | desc. |
283 |
|
determine_sorting(#{rsm := #rsm_in{direction = before}}) -> |
284 |
:-( |
desc; |
285 |
|
determine_sorting(_) -> |
286 |
:-( |
asc. |
287 |
|
|
288 |
|
-spec maybe_add_from_constraint(mongoose_elasticsearch:query(), map()) -> |
289 |
|
mongoose_elasticsearch:query(). |
290 |
|
maybe_add_from_constraint(Query, #{rsm := #rsm_in{index = Offset}}) when is_integer(Offset) -> |
291 |
:-( |
Query#{from => Offset}; |
292 |
|
maybe_add_from_constraint(Query, _) -> |
293 |
:-( |
Query. |
294 |
|
|
295 |
|
-spec search_result_to_mam_lookup_result(map(), map()) -> mod_mam:lookup_result(). |
296 |
|
search_result_to_mam_lookup_result(Result, Params) -> |
297 |
:-( |
#{<<"hits">> := |
298 |
|
#{<<"hits">> := Hits, |
299 |
|
<<"total">> := TotalCount}} = Result, |
300 |
|
|
301 |
:-( |
Messages = lists:sort( |
302 |
|
lists:map(fun hit_to_mam_message/1, Hits)), |
303 |
|
|
304 |
:-( |
case maps:get(is_simple, Params) of |
305 |
|
true -> |
306 |
:-( |
{undefined, undefined, Messages}; |
307 |
|
_ -> |
308 |
:-( |
CorrectedTotalCount = corrected_total_count(TotalCount, Params), |
309 |
:-( |
Count = length(Messages), |
310 |
:-( |
Offset = calculate_offset(TotalCount, Count, Params), |
311 |
:-( |
{CorrectedTotalCount, Offset, Messages} |
312 |
|
end. |
313 |
|
|
314 |
|
-spec hit_to_mam_message(map()) -> mod_mam:message_row(). |
315 |
|
hit_to_mam_message(#{<<"_source">> := JSON}) -> |
316 |
:-( |
MessageId = maps:get(<<"mam_id">>, JSON), |
317 |
:-( |
Packet = maps:get(<<"message">>, JSON), |
318 |
:-( |
SourceBinJid = maps:get(<<"source_jid">>, JSON), |
319 |
:-( |
{ok, Stanza} = exml:parse(Packet), |
320 |
:-( |
#{id => MessageId, jid => jid:from_binary(SourceBinJid), packet => Stanza}. |
321 |
|
|
322 |
|
hit_to_gdpr_mam_message(#{<<"_source">> := JSON}) -> |
323 |
:-( |
MessageId = maps:get(<<"mam_id">>, JSON), |
324 |
:-( |
Packet = maps:get(<<"message">>, JSON), |
325 |
:-( |
SourceBinJid = maps:get(<<"source_jid">>, JSON), |
326 |
:-( |
{integer_to_binary(MessageId), SourceBinJid, Packet}. |
327 |
|
|
328 |
|
%% Usage of RSM affects the `"total"' value returned by ElasticSearch. Per RSM spec, the count |
329 |
|
%% returned by the query should represent the size of the whole result set, which in case of MAM |
330 |
|
%% is bound only by the MAM filters. |
331 |
|
%% The solution is to compute the archive size as if the RSM wasn't used. There is an obvious race |
332 |
|
%% condition here, because a user may send a message between initial request to ElasticSearch and |
333 |
|
%% the count request issued here. |
334 |
|
-spec corrected_total_count(non_neg_integer(), mongoose_elasticsearch:query()) -> |
335 |
|
non_neg_integer(). |
336 |
|
corrected_total_count(_, #{rsm := #rsm_in{id = Id}} = Params) when is_integer(Id) -> |
337 |
:-( |
Query = build_search_query(Params#{rsm := undefined}), |
338 |
:-( |
archive_size(Query); |
339 |
|
corrected_total_count(Count, _) -> |
340 |
:-( |
Count. |
341 |
|
|
342 |
|
-spec calculate_offset(non_neg_integer(), non_neg_integer(), map()) -> non_neg_integer(). |
343 |
|
calculate_offset(_, _, #{rsm := #rsm_in{direction = undefined, index = Index}}) when is_integer(Index) -> |
344 |
:-( |
Index; |
345 |
|
calculate_offset(TotalCount, Count, #{rsm := #rsm_in{direction = before}}) -> |
346 |
:-( |
TotalCount - Count; |
347 |
|
calculate_offset(_, _, #{rsm := #rsm_in{direction = aft, id = Id}} = Params0) when is_integer(Id) -> |
348 |
|
%% Not sure how this works.. |
349 |
:-( |
Params1 = update_borders(Params0#{rsm := undefined}, Id + 1), |
350 |
:-( |
Query = build_search_query(Params1), |
351 |
:-( |
archive_size(Query); |
352 |
|
calculate_offset(_, _, _) -> |
353 |
:-( |
0. |
354 |
|
|
355 |
|
-spec update_borders(map(), non_neg_integer()) -> map(). |
356 |
|
update_borders(#{borders := Borders} = Params, EndId) -> |
357 |
:-( |
Params#{borders := update_borders_to_id(Borders, EndId)}. |
358 |
|
|
359 |
|
-spec update_borders_to_id(#mam_borders{} | undefined, non_neg_integer()) -> #mam_borders{}. |
360 |
|
update_borders_to_id(undefined, EndId) -> |
361 |
:-( |
#mam_borders{to_id = EndId}; |
362 |
|
update_borders_to_id(Borders, EndId) -> |
363 |
:-( |
Borders#mam_borders{to_id = EndId}. |
364 |
|
|
365 |
|
-spec archive_size(mongoose_elasticsearch:query()) -> non_neg_integer(). |
366 |
|
archive_size(Query) -> |
367 |
:-( |
case mongoose_elasticsearch:count(?INDEX_NAME, ?TYPE_NAME, Query) of |
368 |
|
{ok, Count} -> |
369 |
:-( |
Count; |
370 |
|
{error, Reason} -> |
371 |
:-( |
?LOG_ERROR(#{what => archive_size_failed, reason => Reason, es_query => Query, |
372 |
:-( |
text => <<"Failed to retrieve count of messages from ElasticSearch">>}), |
373 |
:-( |
0 |
374 |
|
end. |