./ct_report/coverage/mongoose_cluster.COVER.html

1 -module(mongoose_cluster).
2
3 %% This is a library module for cluster management: joining / leaving a cluster.
4
5 %% TODO: it might make sense to expose this stuff as mod_admin_extra_cluster
6
7 -export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).
8
9 -export([all_cluster_nodes/0, other_cluster_nodes/0]).
10
11 -ignore_xref([all_cluster_nodes/0]).
12
13 -include("mongoose.hrl").
14
15 -dialyzer({[no_match, no_return], set_extra_db_nodes/1}).
16
17 %%
18 %% API
19 %%
20
21 %% @doc Join a cluster designated by ClusterMember.
22 %% This drops all current connections and discards all persistent
23 %% data from Mnesia. Use with caution!
24 %% Next time the node starts, it will connect to other members automatically.
25 -spec join(node()) -> ok.
26 join(ClusterMember) ->
27 28 node_trans(fun() -> do_join(ClusterMember) end).
28
29 do_join(ClusterMember) ->
30 28 ?LOG_NOTICE(#{what => cluster_join,
31 text => <<"Stop mongooseim to join the cluster">>,
32 28 member => ClusterMember}),
33 28 with_app_stopped(mongooseim,
34 fun () ->
35 28 check_networking(ClusterMember),
36 28 unsafe_join(node(), ClusterMember)
37 end).
38
39 %% @doc Leave cluster.
40 %% This drops all current connections and discards all persistent
41 %% data from Mnesia. Use with caution!
42 %% Next time the node starts, it will NOT connect to previous members.
43 %% Remaining members will remove this node from the cluster Mnesia schema.
44 -spec leave() -> ok.
45 leave() ->
46 26 node_trans(fun() -> do_leave() end).
47
48 do_leave() ->
49 26 ?LOG_NOTICE(#{what => cluster_leave,
50 26 text => <<"Stop mongooseim to leave the cluster">>}),
51 26 with_app_stopped(mongooseim,
52 fun () ->
53 26 catch mnesia:stop(),
54 26 detach_nodes(mnesia_nodes()),
55 26 delete_mnesia(),
56 26 ok = mnesia:start()
57 end).
58
59 %% @doc Remove dead node from the cluster.
60 %% The removing node must be down
61 -spec remove_from_cluster(node()) -> ok.
62 remove_from_cluster(Node) ->
63
:-(
node_trans(fun() -> do_remove_from_cluster(Node) end).
64
65 do_remove_from_cluster(Node) ->
66
:-(
NodeAlive = is_node_alive(Node),
67
:-(
NodeAlive andalso error({node_is_alive, Node}),
68
:-(
remove_dead_from_cluster(Node).
69
70 -spec all_cluster_nodes() -> [node()].
71 all_cluster_nodes() ->
72
:-(
[node() | other_cluster_nodes()].
73
74 -spec other_cluster_nodes() -> [node()].
75 other_cluster_nodes() ->
76 20 lists:filter(fun is_mongooseim_node/1, nodes()).
77
78 %%
79 %% Helpers
80 %%
81
82 remove_dead_from_cluster(DeadNode) ->
83
:-(
?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster,
84 text => <<"Removing dead member node from the cluster">>,
85
:-(
member => DeadNode}),
86
:-(
case mnesia:del_table_copy(schema, DeadNode) of
87 {atomic, ok} ->
88
:-(
ok;
89 {aborted, R} ->
90
:-(
error({del_table_copy_schema, R})
91 end.
92
93 is_node_alive(Node) ->
94 1 try check_networking(Node) of
95 true ->
96 1 true
97 catch
98 error:_ ->
99
:-(
false
100 end.
101
102 -spec is_mongooseim_node(node()) -> boolean().
103 is_mongooseim_node(Node) ->
104 40 Apps = rpc:call(Node, application, which_applications, []),
105 40 lists:keymember(mongooseim, 1, Apps).
106
107 is_app_running(App) ->
108 54 lists:keymember(App, 1, application:which_applications()).
109
110 check_networking(ClusterMember) ->
111 29 ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]).
112
113 unsafe_join(Node, ClusterMember) ->
114 28 delete_mnesia(),
115 28 ok = mnesia:start(),
116 28 set_extra_db_nodes(ClusterMember),
117 28 true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)),
118 28 ok = change_schema_type(Node),
119 28 Tables = [ {T, table_type(ClusterMember, T)}
120 28 || T <- mnesia:system_info(tables),
121 545 T /= schema ],
122 28 Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)}
123 28 || {T, Type} = Table <- Tables ],
124 28 lists:foreach(fun check_if_successful_copied/1, Copied),
125 28 ok.
126
127 set_extra_db_nodes(ClusterMember) ->
128 28 case mnesia:change_config(extra_db_nodes, [ClusterMember]) of
129 {ok, [ClusterMember]} ->
130 28 ok;
131 Other ->
132
:-(
error(#{reason => set_extra_db_nodes_failed,
133 result => Other,
134 cluster_member => ClusterMember})
135 end.
136
137 check_if_successful_copied(TableEl) ->
138 517 case TableEl of
139 {_, {atomic, ok}} ->
140 489 ok;
141 {_, {aborted, {already_exists, _, _}}} ->
142 28 ok;
143 Other ->
144
:-(
error({add_table_copy_error, TableEl, Other})
145 end.
146
147 change_schema_type(Node) ->
148 28 case mnesia:change_table_copy_type(schema, Node, disc_copies) of
149 {atomic, ok} ->
150 26 ok;
151 {aborted, {already_exists, _, _, _}} ->
152 2 ok;
153 {aborted, R} ->
154
:-(
{error, R}
155 end.
156
157 table_type(ClusterMember, T) ->
158 517 try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of
159 Type when Type =:= disc_copies;
160 Type =:= ram_copies;
161 517 Type =:= disc_only_copies -> Type
162 catch
163
:-(
E:R -> error({cant_get_storage_type, {T, E, R}}, [T])
164 end.
165
166 %% This will remove all your Mnesia data!
167 %% You've been warned.
168 delete_mnesia() ->
169 54 catch mnesia:stop(),
170 54 Dir = mnesia:system_info(directory),
171 54 case application:get_env(mnesia, dir, undefined) of
172 54 undefined -> ok;
173 Dir ->
174 %% Both settings match, OK!
175
:-(
ok;
176 AppEnvDir ->
177
:-(
?LOG_NOTICE(#{what => mnesia_configuration,
178 text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) "
179 "returned different paths. mnesia_dir and env_mnesia_dir are different.">>,
180
:-(
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}),
181
:-(
ok
182 end,
183 54 ok = rmrf(Dir),
184 54 ?LOG_NOTICE(#{what => mnesia_deleted,
185 text => <<"Mnesia schema and files deleted.">>,
186 54 mnesia_dir => Dir}),
187 54 ok.
188
189 wait_for_pong(Node) ->
190 29 wait_for_pong(net_adm:ping(Node), Node, 5, 100).
191
192 wait_for_pong(pong, _Node, _Retries, _Interval) ->
193 29 ok;
194 wait_for_pong(pang, _Node, 0, _Interval) ->
195
:-(
timeout;
196 wait_for_pong(pang, Node, Retries, Interval) ->
197
:-(
timer:sleep(Interval),
198
:-(
wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval).
199
200 rmrf(Dir) ->
201 442 case file:list_dir(Dir) of
202
:-(
{error, enoent} -> ok;
203 {error, enotdir} ->
204 388 ok = file:delete(Dir);
205 {ok, Dirs} ->
206 54 [ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs],
207 54 ok
208 end.
209
210 detach_nodes(Nodes) ->
211 26 Node = node(),
212 26 {_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]).
213
214 mnesia_nodes() ->
215 26 mnesia:system_info(db_nodes) -- [node()].
216
217 with_app_stopped(App, F) ->
218 54 Running = is_app_running(App),
219 54 Running andalso application:stop(App),
220 54 try
221 54 F()
222 after
223 54 Running andalso application:start(App)
224 end.
225
226 node_trans(F) ->
227 54 global:trans({{mongoose_cluster_op, node()}, self()}, F).
Line Hits Source