./ct_report/coverage/mongoose_cluster.COVER.html

1 -module(mongoose_cluster).
2
3 %% This is a library module for cluster management: joining / leaving a cluster.
4
5 -export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]).
6
7 -export([all_cluster_nodes/0, other_cluster_nodes/0]).
8
9 -ignore_xref([all_cluster_nodes/0]).
10
11 -include("mongoose.hrl").
12
13 -dialyzer({[no_match, no_return], set_extra_db_nodes/1}).
14
15 %%
16 %% API
17 %%
18
19 %% @doc Join a cluster designated by ClusterMember.
20 %% This drops all current connections and discards all persistent
21 %% data from Mnesia. Use with caution!
22 %% Next time the node starts, it will connect to other members automatically.
23 -spec join(node()) -> ok.
24 join(ClusterMember) ->
25 38 node_trans(fun() -> do_join(ClusterMember) end).
26
27 do_join(ClusterMember) ->
28 38 ?LOG_NOTICE(#{what => cluster_join,
29 text => <<"Stop mongooseim to join the cluster">>,
30 38 member => ClusterMember}),
31 38 with_app_stopped(mongooseim,
32 fun () ->
33 38 check_networking(ClusterMember),
34 37 unsafe_join(node(), ClusterMember)
35 end).
36
37 %% @doc Leave cluster.
38 %% This drops all current connections and discards all persistent
39 %% data from Mnesia. Use with caution!
40 %% Next time the node starts, it will NOT connect to previous members.
41 %% Remaining members will remove this node from the cluster Mnesia schema.
42 -spec leave() -> ok.
43 leave() ->
44 36 node_trans(fun() -> do_leave() end).
45
46 do_leave() ->
47 36 ?LOG_NOTICE(#{what => cluster_leave,
48 36 text => <<"Stop mongooseim to leave the cluster">>}),
49 36 with_app_stopped(mongooseim,
50 fun () ->
51 36 catch mnesia:stop(),
52 36 detach_nodes(mnesia_nodes()),
53 36 delete_mnesia(),
54 36 ok = mnesia:start()
55 end).
56
57 %% @doc Remove dead node from the cluster.
58 %% The removing node must be down
59 -spec remove_from_cluster(node()) -> ok.
60 remove_from_cluster(Node) ->
61 2 node_trans(fun() -> do_remove_from_cluster(Node) end).
62
63 do_remove_from_cluster(Node) ->
64 2 NodeAlive = is_node_alive(Node),
65 2 NodeAlive andalso error({node_is_alive, Node}),
66 2 remove_dead_from_cluster(Node).
67
68 -spec all_cluster_nodes() -> [node()].
69 all_cluster_nodes() ->
70
:-(
[node() | other_cluster_nodes()].
71
72 -spec other_cluster_nodes() -> [node()].
73 other_cluster_nodes() ->
74 34 lists:filter(fun is_mongooseim_node/1, nodes()).
75
76 %%
77 %% Helpers
78 %%
79
80 remove_dead_from_cluster(DeadNode) ->
81 2 ?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster,
82 text => <<"Removing dead member node from the cluster">>,
83 2 member => DeadNode}),
84 2 case mnesia:del_table_copy(schema, DeadNode) of
85 {atomic, ok} ->
86 2 ok;
87 {aborted, R} ->
88
:-(
error({del_table_copy_schema, R})
89 end.
90
91 is_node_alive(Node) ->
92 7 try check_networking(Node) of
93 true ->
94 3 true
95 catch
96 error:_ ->
97 4 false
98 end.
99
100 -spec is_mongooseim_node(node()) -> boolean().
101 is_mongooseim_node(Node) ->
102 68 Apps = rpc:call(Node, application, which_applications, []),
103 68 lists:keymember(mongooseim, 1, Apps).
104
105 is_app_running(App) ->
106 74 lists:keymember(App, 1, application:which_applications()).
107
108 check_networking(ClusterMember) ->
109 45 ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]).
110
111 unsafe_join(Node, ClusterMember) ->
112 37 delete_mnesia(),
113 37 ok = mnesia:start(),
114 37 set_extra_db_nodes(ClusterMember),
115 37 true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)),
116 37 ok = change_schema_type(Node),
117 37 Tables = [ {T, table_type(ClusterMember, T)}
118 37 || T <- mnesia:system_info(tables),
119 557 T /= schema ],
120 37 Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)}
121 37 || {T, Type} = Table <- Tables ],
122 37 lists:foreach(fun check_if_successful_copied/1, Copied),
123 37 ok.
124
125 set_extra_db_nodes(ClusterMember) ->
126 37 case mnesia:change_config(extra_db_nodes, [ClusterMember]) of
127 {ok, [ClusterMember]} ->
128 37 ok;
129 Other ->
130
:-(
error(#{reason => set_extra_db_nodes_failed,
131 result => Other,
132 cluster_member => ClusterMember})
133 end.
134
135 check_if_successful_copied(TableEl) ->
136 520 case TableEl of
137 {_, {atomic, ok}} ->
138 467 ok;
139 {_, {aborted, {already_exists, _, _}}} ->
140 53 ok;
141 Other ->
142
:-(
error({add_table_copy_error, TableEl, Other})
143 end.
144
145 change_schema_type(Node) ->
146 37 case mnesia:change_table_copy_type(schema, Node, disc_copies) of
147 {atomic, ok} ->
148 34 ok;
149 {aborted, {already_exists, _, _, _}} ->
150 3 ok;
151 {aborted, R} ->
152
:-(
{error, R}
153 end.
154
155 table_type(ClusterMember, T) ->
156 520 try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of
157 Type when Type =:= disc_copies;
158 Type =:= ram_copies;
159 520 Type =:= disc_only_copies -> Type
160 catch
161
:-(
E:R -> error({cant_get_storage_type, {T, E, R}}, [T])
162 end.
163
164 %% This will remove all your Mnesia data!
165 %% You've been warned.
166 delete_mnesia() ->
167 73 catch mnesia:stop(),
168 73 Dir = mnesia:system_info(directory),
169 73 case application:get_env(mnesia, dir, undefined) of
170 73 undefined -> ok;
171 Dir ->
172 %% Both settings match, OK!
173
:-(
ok;
174 AppEnvDir ->
175
:-(
?LOG_NOTICE(#{what => mnesia_configuration,
176 text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) "
177 "returned different paths. mnesia_dir and env_mnesia_dir are different.">>,
178
:-(
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}),
179
:-(
ok
180 end,
181 73 ok = rmrf(Dir),
182 73 ?LOG_NOTICE(#{what => mnesia_deleted,
183 text => <<"Mnesia schema and files deleted.">>,
184 73 mnesia_dir => Dir}),
185 73 ok.
186
187 wait_for_pong(Node) ->
188 45 wait_for_pong(net_adm:ping(Node), Node, 5, 100).
189
190 wait_for_pong(pong, _Node, _Retries, _Interval) ->
191 40 ok;
192 wait_for_pong(pang, _Node, 0, _Interval) ->
193 5 timeout;
194 wait_for_pong(pang, Node, Retries, Interval) ->
195 25 timer:sleep(Interval),
196 25 wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval).
197
198 rmrf(Dir) ->
199 533 case file:list_dir(Dir) of
200
:-(
{error, enoent} -> ok;
201 {error, enotdir} ->
202 460 ok = file:delete(Dir);
203 {ok, Dirs} ->
204 73 [ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs],
205 73 ok
206 end.
207
208 detach_nodes(Nodes) ->
209 36 Node = node(),
210 36 {_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]).
211
212 mnesia_nodes() ->
213 36 mnesia:system_info(db_nodes) -- [node()].
214
215 with_app_stopped(App, F) ->
216 74 Running = is_app_running(App),
217 74 Running andalso application:stop(App),
218 74 try
219 74 F()
220 after
221 74 Running andalso application:start(App)
222 end.
223
224 node_trans(F) ->
225 76 global:trans({{mongoose_cluster_op, node()}, self()}, F).
Line Hits Source