1 |
|
-module(mongoose_cluster). |
2 |
|
|
3 |
|
%% This is a library module for cluster management: joining / leaving a cluster. |
4 |
|
|
5 |
|
-export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]). |
6 |
|
|
7 |
|
-export([all_cluster_nodes/0, other_cluster_nodes/0]). |
8 |
|
|
9 |
|
-ignore_xref([all_cluster_nodes/0]). |
10 |
|
|
11 |
|
-include("mongoose.hrl"). |
12 |
|
|
13 |
|
-dialyzer({[no_match, no_return], set_extra_db_nodes/1}). |
14 |
|
|
15 |
|
%% |
16 |
|
%% API |
17 |
|
%% |
18 |
|
|
19 |
|
%% @doc Join a cluster designated by ClusterMember. |
20 |
|
%% This drops all current connections and discards all persistent |
21 |
|
%% data from Mnesia. Use with caution! |
22 |
|
%% Next time the node starts, it will connect to other members automatically. |
23 |
|
-spec join(node()) -> ok. |
24 |
|
join(ClusterMember) -> |
25 |
38 |
node_trans(fun() -> do_join(ClusterMember) end). |
26 |
|
|
27 |
|
do_join(ClusterMember) -> |
28 |
38 |
?LOG_NOTICE(#{what => cluster_join, |
29 |
|
text => <<"Stop mongooseim to join the cluster">>, |
30 |
38 |
member => ClusterMember}), |
31 |
38 |
with_app_stopped(mongooseim, |
32 |
|
fun () -> |
33 |
38 |
check_networking(ClusterMember), |
34 |
37 |
unsafe_join(node(), ClusterMember) |
35 |
|
end). |
36 |
|
|
37 |
|
%% @doc Leave cluster. |
38 |
|
%% This drops all current connections and discards all persistent |
39 |
|
%% data from Mnesia. Use with caution! |
40 |
|
%% Next time the node starts, it will NOT connect to previous members. |
41 |
|
%% Remaining members will remove this node from the cluster Mnesia schema. |
42 |
|
-spec leave() -> ok. |
43 |
|
leave() -> |
44 |
36 |
node_trans(fun() -> do_leave() end). |
45 |
|
|
46 |
|
do_leave() -> |
47 |
36 |
?LOG_NOTICE(#{what => cluster_leave, |
48 |
36 |
text => <<"Stop mongooseim to leave the cluster">>}), |
49 |
36 |
with_app_stopped(mongooseim, |
50 |
|
fun () -> |
51 |
36 |
catch mnesia:stop(), |
52 |
36 |
detach_nodes(mnesia_nodes()), |
53 |
36 |
delete_mnesia(), |
54 |
36 |
ok = mnesia:start() |
55 |
|
end). |
56 |
|
|
57 |
|
%% @doc Remove dead node from the cluster. |
58 |
|
%% The removing node must be down |
59 |
|
-spec remove_from_cluster(node()) -> ok. |
60 |
|
remove_from_cluster(Node) -> |
61 |
2 |
node_trans(fun() -> do_remove_from_cluster(Node) end). |
62 |
|
|
63 |
|
do_remove_from_cluster(Node) -> |
64 |
2 |
NodeAlive = is_node_alive(Node), |
65 |
2 |
NodeAlive andalso error({node_is_alive, Node}), |
66 |
2 |
remove_dead_from_cluster(Node). |
67 |
|
|
68 |
|
-spec all_cluster_nodes() -> [node()]. |
69 |
|
all_cluster_nodes() -> |
70 |
:-( |
[node() | other_cluster_nodes()]. |
71 |
|
|
72 |
|
-spec other_cluster_nodes() -> [node()]. |
73 |
|
other_cluster_nodes() -> |
74 |
34 |
lists:filter(fun is_mongooseim_node/1, nodes()). |
75 |
|
|
76 |
|
%% |
77 |
|
%% Helpers |
78 |
|
%% |
79 |
|
|
80 |
|
remove_dead_from_cluster(DeadNode) -> |
81 |
2 |
?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster, |
82 |
|
text => <<"Removing dead member node from the cluster">>, |
83 |
2 |
member => DeadNode}), |
84 |
2 |
case mnesia:del_table_copy(schema, DeadNode) of |
85 |
|
{atomic, ok} -> |
86 |
2 |
ok; |
87 |
|
{aborted, R} -> |
88 |
:-( |
error({del_table_copy_schema, R}) |
89 |
|
end. |
90 |
|
|
91 |
|
is_node_alive(Node) -> |
92 |
7 |
try check_networking(Node) of |
93 |
|
true -> |
94 |
3 |
true |
95 |
|
catch |
96 |
|
error:_ -> |
97 |
4 |
false |
98 |
|
end. |
99 |
|
|
100 |
|
-spec is_mongooseim_node(node()) -> boolean(). |
101 |
|
is_mongooseim_node(Node) -> |
102 |
68 |
Apps = rpc:call(Node, application, which_applications, []), |
103 |
68 |
lists:keymember(mongooseim, 1, Apps). |
104 |
|
|
105 |
|
is_app_running(App) -> |
106 |
74 |
lists:keymember(App, 1, application:which_applications()). |
107 |
|
|
108 |
|
check_networking(ClusterMember) -> |
109 |
45 |
ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]). |
110 |
|
|
111 |
|
unsafe_join(Node, ClusterMember) -> |
112 |
37 |
delete_mnesia(), |
113 |
37 |
ok = mnesia:start(), |
114 |
37 |
set_extra_db_nodes(ClusterMember), |
115 |
37 |
true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)), |
116 |
37 |
ok = change_schema_type(Node), |
117 |
37 |
Tables = [ {T, table_type(ClusterMember, T)} |
118 |
37 |
|| T <- mnesia:system_info(tables), |
119 |
557 |
T /= schema ], |
120 |
37 |
Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)} |
121 |
37 |
|| {T, Type} = Table <- Tables ], |
122 |
37 |
lists:foreach(fun check_if_successful_copied/1, Copied), |
123 |
37 |
ok. |
124 |
|
|
125 |
|
set_extra_db_nodes(ClusterMember) -> |
126 |
37 |
case mnesia:change_config(extra_db_nodes, [ClusterMember]) of |
127 |
|
{ok, [ClusterMember]} -> |
128 |
37 |
ok; |
129 |
|
Other -> |
130 |
:-( |
error(#{reason => set_extra_db_nodes_failed, |
131 |
|
result => Other, |
132 |
|
cluster_member => ClusterMember}) |
133 |
|
end. |
134 |
|
|
135 |
|
check_if_successful_copied(TableEl) -> |
136 |
520 |
case TableEl of |
137 |
|
{_, {atomic, ok}} -> |
138 |
467 |
ok; |
139 |
|
{_, {aborted, {already_exists, _, _}}} -> |
140 |
53 |
ok; |
141 |
|
Other -> |
142 |
:-( |
error({add_table_copy_error, TableEl, Other}) |
143 |
|
end. |
144 |
|
|
145 |
|
change_schema_type(Node) -> |
146 |
37 |
case mnesia:change_table_copy_type(schema, Node, disc_copies) of |
147 |
|
{atomic, ok} -> |
148 |
34 |
ok; |
149 |
|
{aborted, {already_exists, _, _, _}} -> |
150 |
3 |
ok; |
151 |
|
{aborted, R} -> |
152 |
:-( |
{error, R} |
153 |
|
end. |
154 |
|
|
155 |
|
table_type(ClusterMember, T) -> |
156 |
520 |
try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of |
157 |
|
Type when Type =:= disc_copies; |
158 |
|
Type =:= ram_copies; |
159 |
520 |
Type =:= disc_only_copies -> Type |
160 |
|
catch |
161 |
:-( |
E:R -> error({cant_get_storage_type, {T, E, R}}, [T]) |
162 |
|
end. |
163 |
|
|
164 |
|
%% This will remove all your Mnesia data! |
165 |
|
%% You've been warned. |
166 |
|
delete_mnesia() -> |
167 |
73 |
catch mnesia:stop(), |
168 |
73 |
Dir = mnesia:system_info(directory), |
169 |
73 |
case application:get_env(mnesia, dir, undefined) of |
170 |
73 |
undefined -> ok; |
171 |
|
Dir -> |
172 |
|
%% Both settings match, OK! |
173 |
:-( |
ok; |
174 |
|
AppEnvDir -> |
175 |
:-( |
?LOG_NOTICE(#{what => mnesia_configuration, |
176 |
|
text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) " |
177 |
|
"returned different paths. mnesia_dir and env_mnesia_dir are different.">>, |
178 |
:-( |
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}), |
179 |
:-( |
ok |
180 |
|
end, |
181 |
73 |
ok = rmrf(Dir), |
182 |
73 |
?LOG_NOTICE(#{what => mnesia_deleted, |
183 |
|
text => <<"Mnesia schema and files deleted.">>, |
184 |
73 |
mnesia_dir => Dir}), |
185 |
73 |
ok. |
186 |
|
|
187 |
|
wait_for_pong(Node) -> |
188 |
45 |
wait_for_pong(net_adm:ping(Node), Node, 5, 100). |
189 |
|
|
190 |
|
wait_for_pong(pong, _Node, _Retries, _Interval) -> |
191 |
40 |
ok; |
192 |
|
wait_for_pong(pang, _Node, 0, _Interval) -> |
193 |
5 |
timeout; |
194 |
|
wait_for_pong(pang, Node, Retries, Interval) -> |
195 |
25 |
timer:sleep(Interval), |
196 |
25 |
wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval). |
197 |
|
|
198 |
|
rmrf(Dir) -> |
199 |
533 |
case file:list_dir(Dir) of |
200 |
:-( |
{error, enoent} -> ok; |
201 |
|
{error, enotdir} -> |
202 |
460 |
ok = file:delete(Dir); |
203 |
|
{ok, Dirs} -> |
204 |
73 |
[ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs], |
205 |
73 |
ok |
206 |
|
end. |
207 |
|
|
208 |
|
detach_nodes(Nodes) -> |
209 |
36 |
Node = node(), |
210 |
36 |
{_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]). |
211 |
|
|
212 |
|
mnesia_nodes() -> |
213 |
36 |
mnesia:system_info(db_nodes) -- [node()]. |
214 |
|
|
215 |
|
with_app_stopped(App, F) -> |
216 |
74 |
Running = is_app_running(App), |
217 |
74 |
Running andalso application:stop(App), |
218 |
74 |
try |
219 |
74 |
F() |
220 |
|
after |
221 |
74 |
Running andalso application:start(App) |
222 |
|
end. |
223 |
|
|
224 |
|
node_trans(F) -> |
225 |
76 |
global:trans({{mongoose_cluster_op, node()}, self()}, F). |