./ct_report/coverage/mongoose_cluster.COVER.html

1 -module(mongoose_cluster).
2
3 %% This is a library module for cluster management: joining / leaving a cluster.
4
5 -export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).
6
7 -export([all_cluster_nodes/0, other_cluster_nodes/0]).
8
9 -ignore_xref([all_cluster_nodes/0]).
10
11 -include("mongoose.hrl").
12
13 -dialyzer({[no_match, no_return], set_extra_db_nodes/1}).
14
15 %%
16 %% API
17 %%
18
19 %% @doc Join a cluster designated by ClusterMember.
20 %% This drops all current connections and discards all persistent
21 %% data from Mnesia. Use with caution!
22 %% Next time the node starts, it will connect to other members automatically.
23 -spec join(node()) -> ok.
24 join(ClusterMember) ->
25 39 node_trans(fun() -> do_join(ClusterMember) end).
26
27 do_join(ClusterMember) ->
28 39 ?LOG_NOTICE(#{what => cluster_join,
29 text => <<"Stop mongooseim to join the cluster">>,
30 39 member => ClusterMember}),
31 39 with_app_stopped(mongooseim,
32 fun () ->
33 39 check_networking(ClusterMember),
34 38 unsafe_join(node(), ClusterMember)
35 end).
36
37 %% @doc Leave cluster.
38 %% This drops all current connections and discards all persistent
39 %% data from Mnesia. Use with caution!
40 %% Next time the node starts, it will NOT connect to previous members.
41 %% Remaining members will remove this node from the cluster Mnesia schema.
42 -spec leave() -> ok.
43 leave() ->
44 36 node_trans(fun() -> do_leave() end).
45
46 do_leave() ->
47 36 ?LOG_NOTICE(#{what => cluster_leave,
48 36 text => <<"Stop mongooseim to leave the cluster">>}),
49 36 with_app_stopped(mongooseim,
50 fun () ->
51 36 catch mnesia:stop(),
52 36 detach_nodes(mnesia_nodes()),
53 36 delete_mnesia(),
54 36 ok = mnesia:start()
55 end).
56
57 %% @doc Remove dead node from the cluster.
58 %% The removing node must be down
59 -spec remove_from_cluster(node()) -> ok.
60 remove_from_cluster(Node) ->
61 2 node_trans(fun() -> do_remove_from_cluster(Node) end).
62
63 do_remove_from_cluster(Node) ->
64 2 NodeAlive = is_node_alive(Node),
65 2 NodeAlive andalso error({node_is_alive, Node}),
66 2 remove_dead_from_cluster(Node).
67
68 -spec all_cluster_nodes() -> [node()].
69 all_cluster_nodes() ->
70
:-(
[node() | other_cluster_nodes()].
71
72 -spec other_cluster_nodes() -> [node()].
73 other_cluster_nodes() ->
74 69 lists:filter(fun is_mongooseim_node/1, nodes()).
75
76 %%
77 %% Helpers
78 %%
79
80 remove_dead_from_cluster(DeadNode) ->
81 2 ?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster,
82 text => <<"Removing dead member node from the cluster">>,
83 2 member => DeadNode}),
84 2 case mnesia:del_table_copy(schema, DeadNode) of
85 {atomic, ok} ->
86 2 ok;
87 {aborted, R} ->
88
:-(
error({del_table_copy_schema, R})
89 end.
90
91 is_node_alive(Node) ->
92 7 try check_networking(Node) of
93 true ->
94 3 true
95 catch
96 error:_ ->
97 4 false
98 end.
99
100 -spec is_mongooseim_node(node()) -> boolean().
101 is_mongooseim_node(Node) ->
102 138 Apps = rpc:call(Node, application, which_applications, []),
103 138 lists:keymember(mongooseim, 1, Apps).
104
105 is_app_running(App) ->
106 75 lists:keymember(App, 1, application:which_applications()).
107
108 check_networking(ClusterMember) ->
109 46 ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]).
110
111 unsafe_join(Node, ClusterMember) ->
112 38 delete_mnesia(),
113 38 ok = mnesia:start(),
114 38 set_extra_db_nodes(ClusterMember),
115 38 true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)),
116 38 ok = change_schema_type(Node),
117 38 Tables = [ {T, table_type(ClusterMember, T)}
118 38 || T <- mnesia:system_info(tables),
119 392 T /= schema ],
120 38 Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)}
121 38 || {T, Type} = Table <- Tables ],
122 38 lists:foreach(fun check_if_successful_copied/1, Copied),
123 38 ok.
124
125 set_extra_db_nodes(ClusterMember) ->
126 38 case mnesia:change_config(extra_db_nodes, [ClusterMember]) of
127 {ok, [ClusterMember]} ->
128 38 ok;
129 Other ->
130
:-(
error(#{reason => set_extra_db_nodes_failed,
131 result => Other,
132 cluster_member => ClusterMember})
133 end.
134
135 check_if_successful_copied(TableEl) ->
136 354 case TableEl of
137 {_, {atomic, ok}} ->
138 318 ok;
139 {_, {aborted, {already_exists, _, _}}} ->
140 36 ok;
141 Other ->
142
:-(
error({add_table_copy_error, TableEl, Other})
143 end.
144
145 change_schema_type(Node) ->
146 38 case mnesia:change_table_copy_type(schema, Node, disc_copies) of
147 {atomic, ok} ->
148 34 ok;
149 {aborted, {already_exists, _, _, _}} ->
150 4 ok;
151 {aborted, R} ->
152
:-(
{error, R}
153 end.
154
155 table_type(ClusterMember, T) ->
156 354 try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of
157 Type when Type =:= disc_copies;
158 Type =:= ram_copies;
159 354 Type =:= disc_only_copies -> Type
160 catch
161
:-(
E:R -> error({cant_get_storage_type, {T, E, R}}, [T])
162 end.
163
164 %% This will remove all your Mnesia data!
165 %% You've been warned.
166 delete_mnesia() ->
167 74 catch mnesia:stop(),
168 74 Dir = mnesia:system_info(directory),
169 74 case application:get_env(mnesia, dir, undefined) of
170 74 undefined -> ok;
171 Dir ->
172 %% Both settings match, OK!
173
:-(
ok;
174 AppEnvDir ->
175
:-(
?LOG_NOTICE(#{what => mnesia_configuration,
176 text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) "
177 "returned different paths. mnesia_dir and env_mnesia_dir are different.">>,
178
:-(
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}),
179
:-(
ok
180 end,
181 74 ok = rmrf(Dir),
182 74 ?LOG_NOTICE(#{what => mnesia_deleted,
183 text => <<"Mnesia schema and files deleted.">>,
184 74 mnesia_dir => Dir}),
185 74 ok.
186
187 wait_for_pong(Node) ->
188 46 wait_for_pong(net_adm:ping(Node), Node, 5, 100).
189
190 wait_for_pong(pong, _Node, _Retries, _Interval) ->
191 41 ok;
192 wait_for_pong(pang, _Node, 0, _Interval) ->
193 5 timeout;
194 wait_for_pong(pang, Node, Retries, Interval) ->
195 25 timer:sleep(Interval),
196 25 wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval).
197
198 rmrf(Dir) ->
199 292 case file:list_dir(Dir) of
200
:-(
{error, enoent} -> ok;
201 {error, enotdir} ->
202 218 ok = file:delete(Dir);
203 {ok, Dirs} ->
204 74 [ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs],
205 74 ok
206 end.
207
208 detach_nodes(Nodes) ->
209 36 Node = node(),
210 36 {_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]).
211
212 mnesia_nodes() ->
213 36 mnesia:system_info(db_nodes) -- [node()].
214
215 with_app_stopped(App, F) ->
216 75 Running = is_app_running(App),
217 75 Running andalso application:stop(App),
218 75 try
219 75 F()
220 after
221 75 Running andalso application:start(App)
222 end.
223
224 node_trans(F) ->
225 77 global:trans({{mongoose_cluster_op, node()}, self()}, F).
Line Hits Source