./ct_report/coverage/mongoose_cluster.COVER.html

1 -module(mongoose_cluster).
2
3 %% This is a library module for cluster management: joining / leaving a cluster.
4
5 -export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).
6
7 -export([all_cluster_nodes/0, other_cluster_nodes/0]).
8
9 -ignore_xref([all_cluster_nodes/0]).
10
11 -include("mongoose.hrl").
12
13 -dialyzer({[no_match, no_return], set_extra_db_nodes/1}).
14
15 %%
16 %% API
17 %%
18
19 %% @doc Join a cluster designated by ClusterMember.
20 %% This drops all current connections and discards all persistent
21 %% data from Mnesia. Use with caution!
22 %% Next time the node starts, it will connect to other members automatically.
23 -spec join(node()) -> ok.
24 join(ClusterMember) ->
25 13 node_trans(fun() -> do_join(ClusterMember) end).
26
27 do_join(ClusterMember) ->
28 13 ?LOG_NOTICE(#{what => cluster_join,
29 text => <<"Stop mongooseim to join the cluster">>,
30 13 member => ClusterMember}),
31 13 with_app_stopped(mongooseim,
32 fun () ->
33 13 check_networking(ClusterMember),
34 13 unsafe_join(node(), ClusterMember)
35 end).
36
37 %% @doc Leave cluster.
38 %% This drops all current connections and discards all persistent
39 %% data from Mnesia. Use with caution!
40 %% Next time the node starts, it will NOT connect to previous members.
41 %% Remaining members will remove this node from the cluster Mnesia schema.
42 -spec leave() -> ok.
43 leave() ->
44 11 node_trans(fun() -> do_leave() end).
45
46 do_leave() ->
47 11 ?LOG_NOTICE(#{what => cluster_leave,
48 11 text => <<"Stop mongooseim to leave the cluster">>}),
49 11 with_app_stopped(mongooseim,
50 fun () ->
51 11 catch mnesia:stop(),
52 11 detach_nodes(mnesia_nodes()),
53 11 delete_mnesia(),
54 11 ok = mnesia:start()
55 end).
56
57 %% @doc Remove dead node from the cluster.
58 %% The removing node must be down
59 -spec remove_from_cluster(node()) -> ok.
60 remove_from_cluster(Node) ->
61
:-(
node_trans(fun() -> do_remove_from_cluster(Node) end).
62
63 do_remove_from_cluster(Node) ->
64
:-(
NodeAlive = is_node_alive(Node),
65
:-(
NodeAlive andalso error({node_is_alive, Node}),
66
:-(
remove_dead_from_cluster(Node).
67
68 -spec all_cluster_nodes() -> [node()].
69 all_cluster_nodes() ->
70
:-(
[node() | other_cluster_nodes()].
71
72 -spec other_cluster_nodes() -> [node()].
73 other_cluster_nodes() ->
74 69 lists:filter(fun is_mongooseim_node/1, nodes()).
75
76 %%
77 %% Helpers
78 %%
79
80 remove_dead_from_cluster(DeadNode) ->
81
:-(
?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster,
82 text => <<"Removing dead member node from the cluster">>,
83
:-(
member => DeadNode}),
84
:-(
case mnesia:del_table_copy(schema, DeadNode) of
85 {atomic, ok} ->
86
:-(
ok;
87 {aborted, R} ->
88
:-(
error({del_table_copy_schema, R})
89 end.
90
91 is_node_alive(Node) ->
92
:-(
try check_networking(Node) of
93 true ->
94
:-(
true
95 catch
96 error:_ ->
97
:-(
false
98 end.
99
100 -spec is_mongooseim_node(node()) -> boolean().
101 is_mongooseim_node(Node) ->
102 69 Apps = rpc:call(Node, application, which_applications, []),
103 69 lists:keymember(mongooseim, 1, Apps).
104
105 is_app_running(App) ->
106 24 lists:keymember(App, 1, application:which_applications()).
107
108 check_networking(ClusterMember) ->
109 13 ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]).
110
111 unsafe_join(Node, ClusterMember) ->
112 13 delete_mnesia(),
113 13 ok = mnesia:start(),
114 13 set_extra_db_nodes(ClusterMember),
115 13 true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)),
116 13 ok = change_schema_type(Node),
117 13 Tables = [ {T, table_type(ClusterMember, T)}
118 13 || T <- mnesia:system_info(tables),
119 180 T /= schema ],
120 13 Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)}
121 13 || {T, Type} = Table <- Tables ],
122 13 lists:foreach(fun check_if_successful_copied/1, Copied),
123 13 ok.
124
125 set_extra_db_nodes(ClusterMember) ->
126 13 case mnesia:change_config(extra_db_nodes, [ClusterMember]) of
127 {ok, [ClusterMember]} ->
128 13 ok;
129 Other ->
130
:-(
error(#{reason => set_extra_db_nodes_failed,
131 result => Other,
132 cluster_member => ClusterMember})
133 end.
134
135 check_if_successful_copied(TableEl) ->
136 167 case TableEl of
137 {_, {atomic, ok}} ->
138 151 ok;
139 {_, {aborted, {already_exists, _, _}}} ->
140 16 ok;
141 Other ->
142
:-(
error({add_table_copy_error, TableEl, Other})
143 end.
144
145 change_schema_type(Node) ->
146 13 case mnesia:change_table_copy_type(schema, Node, disc_copies) of
147 {atomic, ok} ->
148 11 ok;
149 {aborted, {already_exists, _, _, _}} ->
150 2 ok;
151 {aborted, R} ->
152
:-(
{error, R}
153 end.
154
155 table_type(ClusterMember, T) ->
156 167 try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of
157 Type when Type =:= disc_copies;
158 Type =:= ram_copies;
159 167 Type =:= disc_only_copies -> Type
160 catch
161
:-(
E:R -> error({cant_get_storage_type, {T, E, R}}, [T])
162 end.
163
164 %% This will remove all your Mnesia data!
165 %% You've been warned.
166 delete_mnesia() ->
167 24 catch mnesia:stop(),
168 24 Dir = mnesia:system_info(directory),
169 24 case application:get_env(mnesia, dir, undefined) of
170 24 undefined -> ok;
171 Dir ->
172 %% Both settings match, OK!
173
:-(
ok;
174 AppEnvDir ->
175
:-(
?LOG_NOTICE(#{what => mnesia_configuration,
176 text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) "
177 "returned different paths. mnesia_dir and env_mnesia_dir are different.">>,
178
:-(
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}),
179
:-(
ok
180 end,
181 24 ok = rmrf(Dir),
182 24 ?LOG_NOTICE(#{what => mnesia_deleted,
183 text => <<"Mnesia schema and files deleted.">>,
184 24 mnesia_dir => Dir}),
185 24 ok.
186
187 wait_for_pong(Node) ->
188 13 wait_for_pong(net_adm:ping(Node), Node, 5, 100).
189
190 wait_for_pong(pong, _Node, _Retries, _Interval) ->
191 13 ok;
192 wait_for_pong(pang, _Node, 0, _Interval) ->
193
:-(
timeout;
194 wait_for_pong(pang, Node, Retries, Interval) ->
195
:-(
timer:sleep(Interval),
196
:-(
wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval).
197
198 rmrf(Dir) ->
199 123 case file:list_dir(Dir) of
200
:-(
{error, enoent} -> ok;
201 {error, enotdir} ->
202 99 ok = file:delete(Dir);
203 {ok, Dirs} ->
204 24 [ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs],
205 24 ok
206 end.
207
208 detach_nodes(Nodes) ->
209 11 Node = node(),
210 11 {_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]).
211
212 mnesia_nodes() ->
213 11 mnesia:system_info(db_nodes) -- [node()].
214
215 with_app_stopped(App, F) ->
216 24 Running = is_app_running(App),
217 24 Running andalso application:stop(App),
218 24 try
219 24 F()
220 after
221 24 Running andalso application:start(App)
222 end.
223
224 node_trans(F) ->
225 24 global:trans({{mongoose_cluster_op, node()}, self()}, F).
Line Hits Source