1 |
|
-module(mongoose_cluster). |
2 |
|
|
3 |
|
%% This is a library module for cluster management: joining / leaving a cluster. |
4 |
|
|
5 |
|
%% TODO: it might make sense to expose this stuff as mod_admin_extra_cluster |
6 |
|
|
7 |
|
-export([join/1, leave/0, remove_from_cluster/1, is_node_alive/1]). |
8 |
|
|
9 |
|
-export([all_cluster_nodes/0, other_cluster_nodes/0]). |
10 |
|
|
11 |
|
-ignore_xref([all_cluster_nodes/0]). |
12 |
|
|
13 |
|
-include("mongoose.hrl"). |
14 |
|
|
15 |
|
-dialyzer({[no_match, no_return], set_extra_db_nodes/1}). |
16 |
|
|
17 |
|
%% |
18 |
|
%% API |
19 |
|
%% |
20 |
|
|
21 |
|
%% @doc Join a cluster designated by ClusterMember. |
22 |
|
%% This drops all current connections and discards all persistent |
23 |
|
%% data from Mnesia. Use with caution! |
24 |
|
%% Next time the node starts, it will connect to other members automatically. |
25 |
|
-spec join(node()) -> ok. |
26 |
|
join(ClusterMember) -> |
27 |
28 |
node_trans(fun() -> do_join(ClusterMember) end). |
28 |
|
|
29 |
|
do_join(ClusterMember) -> |
30 |
28 |
?LOG_NOTICE(#{what => cluster_join, |
31 |
|
text => <<"Stop mongooseim to join the cluster">>, |
32 |
28 |
member => ClusterMember}), |
33 |
28 |
with_app_stopped(mongooseim, |
34 |
|
fun () -> |
35 |
28 |
check_networking(ClusterMember), |
36 |
28 |
unsafe_join(node(), ClusterMember) |
37 |
|
end). |
38 |
|
|
39 |
|
%% @doc Leave cluster. |
40 |
|
%% This drops all current connections and discards all persistent |
41 |
|
%% data from Mnesia. Use with caution! |
42 |
|
%% Next time the node starts, it will NOT connect to previous members. |
43 |
|
%% Remaining members will remove this node from the cluster Mnesia schema. |
44 |
|
-spec leave() -> ok. |
45 |
|
leave() -> |
46 |
26 |
node_trans(fun() -> do_leave() end). |
47 |
|
|
48 |
|
do_leave() -> |
49 |
26 |
?LOG_NOTICE(#{what => cluster_leave, |
50 |
26 |
text => <<"Stop mongooseim to leave the cluster">>}), |
51 |
26 |
with_app_stopped(mongooseim, |
52 |
|
fun () -> |
53 |
26 |
catch mnesia:stop(), |
54 |
26 |
detach_nodes(mnesia_nodes()), |
55 |
26 |
delete_mnesia(), |
56 |
26 |
ok = mnesia:start() |
57 |
|
end). |
58 |
|
|
59 |
|
%% @doc Remove dead node from the cluster. |
60 |
|
%% The removing node must be down |
61 |
|
-spec remove_from_cluster(node()) -> ok. |
62 |
|
remove_from_cluster(Node) -> |
63 |
:-( |
node_trans(fun() -> do_remove_from_cluster(Node) end). |
64 |
|
|
65 |
|
do_remove_from_cluster(Node) -> |
66 |
:-( |
NodeAlive = is_node_alive(Node), |
67 |
:-( |
NodeAlive andalso error({node_is_alive, Node}), |
68 |
:-( |
remove_dead_from_cluster(Node). |
69 |
|
|
70 |
|
-spec all_cluster_nodes() -> [node()]. |
71 |
|
all_cluster_nodes() -> |
72 |
:-( |
[node() | other_cluster_nodes()]. |
73 |
|
|
74 |
|
-spec other_cluster_nodes() -> [node()]. |
75 |
|
other_cluster_nodes() -> |
76 |
20 |
lists:filter(fun is_mongooseim_node/1, nodes()). |
77 |
|
|
78 |
|
%% |
79 |
|
%% Helpers |
80 |
|
%% |
81 |
|
|
82 |
|
remove_dead_from_cluster(DeadNode) -> |
83 |
:-( |
?LOG_INFO(#{what => cluster_remove_dead_node_from_cluster, |
84 |
|
text => <<"Removing dead member node from the cluster">>, |
85 |
:-( |
member => DeadNode}), |
86 |
:-( |
case mnesia:del_table_copy(schema, DeadNode) of |
87 |
|
{atomic, ok} -> |
88 |
:-( |
ok; |
89 |
|
{aborted, R} -> |
90 |
:-( |
error({del_table_copy_schema, R}) |
91 |
|
end. |
92 |
|
|
93 |
|
is_node_alive(Node) -> |
94 |
1 |
try check_networking(Node) of |
95 |
|
true -> |
96 |
1 |
true |
97 |
|
catch |
98 |
|
error:_ -> |
99 |
:-( |
false |
100 |
|
end. |
101 |
|
|
102 |
|
-spec is_mongooseim_node(node()) -> boolean(). |
103 |
|
is_mongooseim_node(Node) -> |
104 |
40 |
Apps = rpc:call(Node, application, which_applications, []), |
105 |
40 |
lists:keymember(mongooseim, 1, Apps). |
106 |
|
|
107 |
|
is_app_running(App) -> |
108 |
54 |
lists:keymember(App, 1, application:which_applications()). |
109 |
|
|
110 |
|
check_networking(ClusterMember) -> |
111 |
29 |
ok == wait_for_pong(ClusterMember) orelse error(pang, [ClusterMember]). |
112 |
|
|
113 |
|
unsafe_join(Node, ClusterMember) -> |
114 |
28 |
delete_mnesia(), |
115 |
28 |
ok = mnesia:start(), |
116 |
28 |
set_extra_db_nodes(ClusterMember), |
117 |
28 |
true = lists:member(ClusterMember, mnesia:system_info(running_db_nodes)), |
118 |
28 |
ok = change_schema_type(Node), |
119 |
28 |
Tables = [ {T, table_type(ClusterMember, T)} |
120 |
28 |
|| T <- mnesia:system_info(tables), |
121 |
551 |
T /= schema ], |
122 |
28 |
Copied = [ {Table, mnesia:add_table_copy(T, Node, Type)} |
123 |
28 |
|| {T, Type} = Table <- Tables ], |
124 |
28 |
lists:foreach(fun check_if_successful_copied/1, Copied), |
125 |
28 |
ok. |
126 |
|
|
127 |
|
set_extra_db_nodes(ClusterMember) -> |
128 |
28 |
case mnesia:change_config(extra_db_nodes, [ClusterMember]) of |
129 |
|
{ok, [ClusterMember]} -> |
130 |
28 |
ok; |
131 |
|
Other -> |
132 |
:-( |
error(#{reason => set_extra_db_nodes_failed, |
133 |
|
result => Other, |
134 |
|
cluster_member => ClusterMember}) |
135 |
|
end. |
136 |
|
|
137 |
|
check_if_successful_copied(TableEl) -> |
138 |
523 |
case TableEl of |
139 |
|
{_, {atomic, ok}} -> |
140 |
495 |
ok; |
141 |
|
{_, {aborted, {already_exists, _, _}}} -> |
142 |
28 |
ok; |
143 |
|
Other -> |
144 |
:-( |
error({add_table_copy_error, TableEl, Other}) |
145 |
|
end. |
146 |
|
|
147 |
|
change_schema_type(Node) -> |
148 |
28 |
case mnesia:change_table_copy_type(schema, Node, disc_copies) of |
149 |
|
{atomic, ok} -> |
150 |
26 |
ok; |
151 |
|
{aborted, {already_exists, _, _, _}} -> |
152 |
2 |
ok; |
153 |
|
{aborted, R} -> |
154 |
:-( |
{error, R} |
155 |
|
end. |
156 |
|
|
157 |
|
table_type(ClusterMember, T) -> |
158 |
523 |
try rpc:call(ClusterMember, mnesia, table_info, [T, storage_type]) of |
159 |
|
Type when Type =:= disc_copies; |
160 |
|
Type =:= ram_copies; |
161 |
523 |
Type =:= disc_only_copies -> Type |
162 |
|
catch |
163 |
:-( |
E:R -> error({cant_get_storage_type, {T, E, R}}, [T]) |
164 |
|
end. |
165 |
|
|
166 |
|
%% This will remove all your Mnesia data! |
167 |
|
%% You've been warned. |
168 |
|
delete_mnesia() -> |
169 |
54 |
catch mnesia:stop(), |
170 |
54 |
Dir = mnesia:system_info(directory), |
171 |
54 |
case application:get_env(mnesia, dir, undefined) of |
172 |
54 |
undefined -> ok; |
173 |
|
Dir -> |
174 |
|
%% Both settings match, OK! |
175 |
:-( |
ok; |
176 |
|
AppEnvDir -> |
177 |
:-( |
?LOG_NOTICE(#{what => mnesia_configuration, |
178 |
|
text => <<"mnesia:system_info(directory) and application:get_env(mnesia, dir) " |
179 |
|
"returned different paths. mnesia_dir and env_mnesia_dir are different.">>, |
180 |
:-( |
mnesia_dir => Dir, env_mnesia_dir => AppEnvDir}), |
181 |
:-( |
ok |
182 |
|
end, |
183 |
54 |
ok = rmrf(Dir), |
184 |
54 |
?LOG_NOTICE(#{what => mnesia_deleted, |
185 |
|
text => <<"Mnesia schema and files deleted.">>, |
186 |
54 |
mnesia_dir => Dir}), |
187 |
54 |
ok. |
188 |
|
|
189 |
|
wait_for_pong(Node) -> |
190 |
29 |
wait_for_pong(net_adm:ping(Node), Node, 5, 100). |
191 |
|
|
192 |
|
wait_for_pong(pong, _Node, _Retries, _Interval) -> |
193 |
29 |
ok; |
194 |
|
wait_for_pong(pang, _Node, 0, _Interval) -> |
195 |
:-( |
timeout; |
196 |
|
wait_for_pong(pang, Node, Retries, Interval) -> |
197 |
:-( |
timer:sleep(Interval), |
198 |
:-( |
wait_for_pong(net_adm:ping(Node), Node, Retries - 1, Interval). |
199 |
|
|
200 |
|
rmrf(Dir) -> |
201 |
443 |
case file:list_dir(Dir) of |
202 |
:-( |
{error, enoent} -> ok; |
203 |
|
{error, enotdir} -> |
204 |
389 |
ok = file:delete(Dir); |
205 |
|
{ok, Dirs} -> |
206 |
54 |
[ ok = rmrf(filename:join(Dir, Sub)) || Sub <- Dirs], |
207 |
54 |
ok |
208 |
|
end. |
209 |
|
|
210 |
|
detach_nodes(Nodes) -> |
211 |
26 |
Node = node(), |
212 |
26 |
{_, []} = rpc:multicall(Nodes, mnesia, del_table_copy, [schema, Node]). |
213 |
|
|
214 |
|
mnesia_nodes() -> |
215 |
26 |
mnesia:system_info(db_nodes) -- [node()]. |
216 |
|
|
217 |
|
with_app_stopped(App, F) -> |
218 |
54 |
Running = is_app_running(App), |
219 |
54 |
Running andalso application:stop(App), |
220 |
54 |
try |
221 |
54 |
F() |
222 |
|
after |
223 |
54 |
Running andalso application:start(App) |
224 |
|
end. |
225 |
|
|
226 |
|
node_trans(F) -> |
227 |
54 |
global:trans({{mongoose_cluster_op, node()}, self()}, F). |