1: %%============================================================================== 2: %% Copyright 2020 Erlang Solutions Ltd. 3: %% 4: %% Licensed under the Apache License, Version 2.0 (the "License"); 5: %% you may not use this file except in compliance with the License. 6: %% You may obtain a copy of the License at 7: %% 8: %% http://www.apache.org/licenses/LICENSE-2.0 9: %% 10: %% Unless required by applicable law or agreed to in writing, software 11: %% distributed under the License is distributed on an "AS IS" BASIS, 12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13: %% See the License for the specific language governing permissions and 14: %% limitations under the License. 15: %%============================================================================== 16: -module(metrics_api_SUITE). 17: -compile([export_all, nowarn_export_all]). 18: 19: -import(distributed_helper, [mim/0, mim2/0, rpc/4]). 20: -import(rest_helper, [assert_status/2, make_request/1]). 21: 22: -include_lib("eunit/include/eunit.hrl"). 23: 24: -import(domain_helper, [host_type/0, domain/0]). 25: 26: %%-------------------------------------------------------------------- 27: %% Suite configuration 28: %%-------------------------------------------------------------------- 29: all() -> 30: [ 31: {group, metrics}, 32: {group, all_metrics_are_global}, 33: {group, global} 34: ]. 35: 36: -define(METRICS_CASES, [ 37: message_flow, 38: one_client_just_logs_in, 39: two_clients_just_log_in, 40: one_message_sent, 41: one_direct_presence_sent, 42: one_iq_sent, 43: one_message_error, 44: one_iq_error, 45: one_presence_error 46: ]). 47: 48: groups() -> 49: [ 50: {metrics, [], [non_existent_metrics | ?METRICS_CASES]}, 51: {all_metrics_are_global, [], ?METRICS_CASES}, 52: {global, [], [session_counters, 53: node_uptime, 54: cluster_size]} 55: ]. 56: 57: init_per_suite(Config) -> 58: HostType = host_type(), 59: Config1 = dynamic_modules:save_modules(HostType, Config), 60: dynamic_modules:ensure_stopped(HostType, [mod_offline]), 61: escalus:init_per_suite(Config1). 62: 63: end_per_suite(Config) -> 64: dynamic_modules:restore_modules(Config), 65: escalus:end_per_suite(Config). 66: 67: init_per_group(GroupName, Config) -> 68: metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 69: 70: end_per_group(GroupName, Config) -> 71: metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 72: 73: init_per_testcase(cluster_size = CN, Config) -> 74: case distributed_helper:has_mnesia(mim()) of 75: true -> 76: Config1 = ensure_nodes_not_clustered(Config), 77: escalus:init_per_testcase(CN, Config1); 78: false -> 79: {skip, "Requires Mnesia"} 80: end; 81: init_per_testcase(CaseName, Config) -> 82: escalus:init_per_testcase(CaseName, Config). 83: 84: end_per_testcase(cluster_size = CN, Config) -> 85: Config1 = ensure_nodes_clustered(Config), 86: escalus:end_per_testcase(CN, Config1); 87: end_per_testcase(CaseName, Config) -> 88: escalus:end_per_testcase(CaseName, Config). 89: 90: %%-------------------------------------------------------------------- 91: %% metrics_api tests 92: %%-------------------------------------------------------------------- 93: 94: non_existent_metrics(_Config) -> 95: IncompleteName = "backends", 96: GlobalMetricName = "adhoc_local_commands", 97: HostType = metrics_helper:make_host_type_name(host_type()), 98: assert_status(404, request(<<"GET">>, "/metrics/all/" ++ IncompleteName)), 99: assert_status(404, request(<<"GET">>, "/metrics/all/badMetric")), 100: assert_status(404, request(<<"GET">>, "/metrics/global/" ++ IncompleteName)), 101: assert_status(404, request(<<"GET">>, "/metrics/global/badMetric")), 102: assert_status(404, request(<<"GET">>, "/metrics/host_type/badHostType")), 103: assert_status(404, request(<<"GET">>, "/metrics/host_type/badHostType/xmppStanzaCount")), 104: assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/", GlobalMetricName])), 105: assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/badMetric"])). 106: 107: message_flow(Config) -> 108: case metrics_helper:all_metrics_are_global(Config) of 109: true -> metrics_only_global(Config); 110: _ -> metrics_msg_flow(Config) 111: end. 112: 113: one_client_just_logs_in(Config) -> 114: instrumented_story 115: (Config, metrics_helper:userspec(1, Config), 116: fun(_User1) -> end_of_story end, 117: %% A list of metrics and their expected relative increase 118: [{xmppIqSent, 0 + user_alpha(2)}, 119: {xmppIqReceived, 0 + user_alpha(2)}, 120: {xmppMessageSent, 0}, 121: {xmppMessageReceived, 0}, 122: {xmppPresenceSent, 0 + user_alpha(1)}, 123: {xmppPresenceReceived, 0 + user_alpha(1)}, 124: {xmppStanzaSent, 0 + user_alpha(3)}, 125: {xmppStanzaReceived, 0 + user_alpha(3)}, 126: {sessionSuccessfulLogins, 0 + user_alpha(1)}, 127: {sessionLogouts, 0 + user_alpha(1)} 128: ]). 129: 130: two_clients_just_log_in(Config) -> 131: instrumented_story 132: (Config, metrics_helper:userspec(1, 1, Config), 133: fun(_User1, _User2) -> end_of_story end, 134: [{xmppIqSent, 0 + user_alpha(4)}, 135: {xmppIqReceived, 0 + user_alpha(4)}, 136: {xmppMessageSent, 0}, 137: {xmppMessageReceived, 0}, 138: {xmppPresenceSent, 0 + user_alpha(2)}, 139: {xmppPresenceReceived, 0 + user_alpha(2)}, 140: {xmppStanzaSent, 0 + user_alpha(6)}, 141: {xmppStanzaReceived, 0 + user_alpha(6)}, 142: {sessionSuccessfulLogins, 0 + user_alpha(2)}, 143: {sessionLogouts, 0 + user_alpha(2)} 144: ]). 145: 146: one_message_sent(Config) -> 147: instrumented_story 148: (Config, metrics_helper:userspec(1, 1, Config), 149: fun(User1, User2) -> 150: Chat = escalus_stanza:chat_to(User2, <<"Hi!">>), 151: escalus_client:send(User1, Chat), 152: escalus_client:wait_for_stanza(User2) 153: end, 154: [{xmppMessageSent, 1}, 155: {xmppMessageReceived, 1}]). 156: 157: one_direct_presence_sent(Config) -> 158: Userspec = metrics_helper:userspec(1, 1, Config), 159: instrumented_story 160: (Config, Userspec, 161: fun(User1, User2) -> 162: Presence = escalus_stanza:presence_direct(User2, <<"available">>), 163: escalus:send(User1, Presence), 164: escalus:wait_for_stanza(User2) 165: end, 166: [{xmppPresenceSent, 1 + user_alpha(2)}, 167: {xmppPresenceReceived, 1 + user_alpha(2)}, 168: {xmppStanzaSent, 1 + user_alpha(6)}, 169: {xmppStanzaReceived, 1 + user_alpha(6)}]). 170: 171: one_iq_sent(Config) -> 172: instrumented_story 173: (Config, metrics_helper:userspec(1, Config), 174: fun(User1) -> 175: RosterIq = escalus_stanza:roster_get(), 176: escalus_client:send(User1, RosterIq), 177: escalus_client:wait_for_stanza(User1) 178: end, 179: [{xmppIqSent, 3}, 180: {xmppIqReceived, 3}, 181: {modRosterGets, 1}, 182: {xmppStanzaSent, 1 + user_alpha(3)}, 183: {xmppStanzaReceived, 1 + user_alpha(3)}]). 184: 185: one_message_error(Config) -> 186: instrumented_story 187: (Config, metrics_helper:userspec(1, Config), 188: fun(User1) -> 189: Chat = escalus_stanza:chat_to 190: (<<"nobody@", (domain())/binary>>, <<"Hi!">>), 191: escalus_client:send(User1, Chat), 192: escalus_client:wait_for_stanza(User1) 193: end, 194: [{xmppErrorTotal, 1}, 195: {xmppErrorIq, 0}, 196: {xmppErrorMessage, 1}, 197: {xmppErrorPresence, 0}]). 198: 199: one_iq_error(Config) -> 200: instrumented_story 201: (Config, metrics_helper:userspec(1, Config), 202: fun(User1) -> 203: BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []), 204: escalus_client:send(User1, BadIQ), 205: escalus_client:wait_for_stanza(User1) 206: end, 207: [{xmppErrorTotal, 1}, 208: {xmppErrorIq, 1}, 209: {xmppErrorMessage, 0}, 210: {xmppErrorPresence, 0}]). 211: 212: one_presence_error(Config) -> 213: instrumented_story 214: (Config, metrics_helper:userspec(1, Config), 215: fun(User1) -> 216: BadPres = escalus_stanza:presence_direct 217: (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []), 218: escalus_client:send(User1, BadPres), 219: escalus_client:wait_for_stanza(User1) 220: end, 221: [{xmppErrorTotal, 1}, 222: {xmppErrorIq, 0}, 223: {xmppErrorMessage, 0}, 224: {xmppErrorPresence, 1}]). 225: 226: session_counters(Config) -> 227: Names = [totalSessionCount, uniqueSessionCount, nodeSessionCount], 228: escalus:story 229: (Config, [{alice, 2}, {bob, 1}], 230: fun(_User11, _User12, _User2) -> 231: %% Force update 232: lists:foreach(fun metrics_helper:sample/1, Names), 233: timer:sleep(timer:seconds(1)), 234: 235: ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)), 236: ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)), 237: ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config)) 238: end). 239: 240: node_uptime(Config) -> 241: X = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 242: timer:sleep(timer:seconds(1)), 243: Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 244: ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]). 245: 246: cluster_size(Config) -> 247: SingleNodeClusterState = 248: fetch_global_incrementing_gauge_value(clusterSize, Config), 249: ?assertEqual(1, SingleNodeClusterState), 250: 251: distributed_helper:add_node_to_cluster(Config), 252: TwoNodesClusterState = 253: fetch_global_incrementing_gauge_value(clusterSize, Config), 254: ?assertEqual(2, TwoNodesClusterState), 255: 256: distributed_helper:remove_node_from_cluster(Config), 257: SingleNodeClusterState2 = 258: fetch_global_incrementing_gauge_value(clusterSize, Config), 259: ?assertEqual(1, SingleNodeClusterState2). 260: 261: %%-------------------------------------------------------------------- 262: %% Helpers 263: %%-------------------------------------------------------------------- 264: 265: metrics_only_global(_Config) -> 266: % 0. GET is the only implemented allowed method 267: % (both OPTIONS and HEAD are for free then) 268: Res = request(<<"OPTIONS">>, "/metrics/", mim2()), 269: {_S, H, _B} = Res, 270: assert_status(200, Res), 271: V = proplists:get_value(<<"allow">>, H), 272: Opts = string:split(V, ", ", all), 273: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 274: 275: % List of host types and metrics 276: Res2 = request(<<"GET">>, "/metrics/", mim2()), 277: {_S2, _H2, B2} = Res2, 278: assert_status(200, Res2), 279: #{<<"host_types">> := [_ExampleHostType | _], 280: <<"metrics">> := [], 281: <<"global">> := [ExampleGlobal | _]} = B2, 282: 283: % All global metrics 284: Res3 = request(<<"GET">>, "/metrics/global", mim2()), 285: {_S3, _H3, B3} = Res3, 286: assert_status(200, Res3), 287: #{<<"metrics">> := _ML} = B3, 288: ?assertEqual(1, maps:size(B3)), 289: 290: % An example global metric 291: Res4 = request(<<"GET">>, ["/metrics/global/", ExampleGlobal], mim2()), 292: {_S4, _H4, B4} = Res4, 293: #{<<"metric">> := _} = B4, 294: ?assertEqual(1, maps:size(B4)). 295: 296: metrics_msg_flow(_Config) -> 297: % 0. GET is the only implemented allowed method 298: % (both OPTIONS and HEAD are for free then) 299: Res = request(<<"OPTIONS">>, "/metrics/"), 300: {_S, H, _B} = Res, 301: assert_status(200, Res), 302: V = proplists:get_value(<<"allow">>, H), 303: Opts = string:split(V, ", ", all), 304: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 305: 306: % List of host types and metrics 307: Res2 = request(<<"GET">>, "/metrics/"), 308: {_S2, _H2, B2} = Res2, 309: assert_status(200, Res2), 310: #{<<"host_types">> := [ExampleHostType | _], 311: <<"metrics">> := [ExampleMetric | _], 312: <<"global">> := [ExampleGlobal | _]} = B2, 313: 314: % Sum of all metrics 315: Res3 = request(<<"GET">>, "/metrics/all"), 316: {_S3, _H3, B3} = Res3, 317: assert_status(200, Res3), 318: #{<<"metrics">> := _ML} = B3, 319: ?assertEqual(1, maps:size(B3)), 320: 321: % Sum for a given metric 322: Res4 = request(<<"GET">>, ["/metrics/all/", ExampleMetric]), 323: {_S4, _H4, B4} = Res4, 324: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4, 325: ?assertEqual(2, maps:size(IM)), 326: ?assertEqual(1, maps:size(B4)), 327: 328: % All metrics for an example host type 329: Res6 = request(<<"GET">>, ["/metrics/host_type/", ExampleHostType]), 330: {_S6, _H6, B6} = Res6, 331: #{<<"metrics">> := _} = B6, 332: ?assertEqual(1, maps:size(B6)), 333: 334: % An example metric for an example host type 335: Res8 = request(<<"GET">>, ["/metrics/host_type/", ExampleHostType, "/", ExampleMetric]), 336: {_S8, _H8, B8} = Res8, 337: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8, 338: ?assertEqual(2, maps:size(IM2)), 339: ?assertEqual(1, maps:size(B8)), 340: 341: % All global metrics 342: Res10 = request(<<"GET">>, "/metrics/global"), 343: {_, _, B10} = Res10, 344: #{<<"metrics">> := _} = B10, 345: ?assertEqual(1, maps:size(B10)), 346: 347: Res11 = request(<<"GET">>, ["/metrics/global/", ExampleGlobal]), 348: {_, _, B11} = Res11, 349: #{<<"metric">> := _} = B11, 350: ?assertEqual(1, maps:size(B11)). 351: 352: user_alpha(NumberOfUsers) -> 353: %% This represents the overhead of logging in N users via escalus:story/3 354: %% For each user, 355: %% xmppStanza(sent|received) 356: %% and 357: %% xmppPresence(sent|received) 358: %% will be bumped by +1 at login. 359: NumberOfUsers. 360: 361: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) -> 362: Befores = fetch_all(Config, CounterSpecs), 363: StoryResult = escalus:story(Config, UsersSpecs, StoryFun), 364: Afters = fetch_all(Config, CounterSpecs), 365: [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters)) 366: || {Name, N} <- CounterSpecs ], 367: StoryResult. 368: 369: fetch_all(Config, CounterSpecs) -> 370: FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of 371: true -> fun fetch_global_spiral_values/2; 372: _ -> fun fetch_counter_value/2 373: end, 374: [ {Counter, FetchCounterFun(Counter, Config)} 375: || {Counter, _} <- CounterSpecs ]. 376: 377: find(CounterName, CounterList) -> 378: case lists:keyfind(CounterName, 1, CounterList) of 379: false -> error(counter_defined_incorrectly); 380: {CounterName, Val} -> Val end. 381: 382: fetch_counter_value(Counter, _Config) -> 383: Metric = atom_to_binary(Counter, utf8), 384: 385: HostType = host_type(), 386: HostTypeName = metrics_helper:make_host_type_name(HostType), 387: 388: Result = request(<<"GET">>, ["/metrics/host_type/", HostTypeName, "/", Metric]), 389: {_S, _H, B} = Result, 390: assert_status(200, Result), 391: #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B, 392: 393: Result2 = request(<<"GET">>, ["/metrics/host_type/", HostTypeName]), 394: {_S2, _H2, B2} = Result2, 395: assert_status(200, Result2), 396: #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2, 397: 398: Result3 = request(<<"GET">>, ["/metrics/all/", Metric]), 399: {_S3, _H3, B3} = Result3, 400: assert_status(200, Result3), 401: #{<<"metric">> := #{<<"count">> := TotalValue}} = B3, 402: 403: Result4 = request(<<"GET">>, "/metrics/all/"), 404: {_S4, _H4, B4} = Result4, 405: assert_status(200, Result4), 406: #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4, 407: 408: [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList]. 409: 410: %% @doc Fetch counter that is static. 411: fetch_global_gauge_value(Counter, Config) -> 412: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 413: ?assertEqual(Value, ValueList, [{counter, Counter}]), 414: Value. 415: 416: %% @doc Fetch counter that can be incremented by server between two API requests. 417: %% 418: %% Returns last actual value 419: fetch_global_incrementing_gauge_value(Counter, Config) -> 420: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 421: ?assertEqual(true, Value =< ValueList, [{counter, Counter}, 422: {value, Value}, 423: {value_list, ValueList}]), 424: ValueList. 425: 426: fetch_global_gauge_values(Counter, Config) -> 427: fetch_global_counter_values(<<"value">>, Counter, Config). 428: 429: fetch_global_spiral_values(Counter, Config) -> 430: % Spirals have two values associated with the metric: "one" and "count". 431: % We are interested in the latter. 432: fetch_global_counter_values(<<"count">>, Counter, Config). 433: 434: fetch_global_counter_values(MetricKey, Counter, Config) -> 435: Metric = atom_to_binary(Counter, utf8), 436: 437: Server = case metrics_helper:all_metrics_are_global(Config) of 438: true -> mim2(); 439: _ -> mim() 440: end, 441: 442: Result = request(<<"GET">>, ["/metrics/global/", Metric], Server), 443: assert_status(200, Result), 444: {_S, H, B} = Result, 445: #{<<"metric">> := #{MetricKey := Value}} = B, 446: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)), 447: ?assertEqual(1, maps:size(B)), 448: 449: Result2 = request(<<"GET">>, ["/metrics/global/"], Server), 450: assert_status(200, Result2), 451: {_S2, H2, B2} = Result2, 452: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)), 453: #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2, 454: ?assertEqual(1, maps:size(B2)), 455: 456: [Value, ValueList]. 457: 458: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) -> 459: ExpectedCounters = [Counter+Inc || Counter <- Counters1], 460: case ExpectedCounters == Counters2 of 461: false -> 462: ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]), 463: error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)}); 464: true -> ok 465: end; 466: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 -> 467: ok. 468: 469: get_diffs(L1, L2) -> 470: lists:zip(L1, L2). 471: 472: ensure_nodes_not_clustered(Config) -> 473: #{node := Node1Name} = RPCNode = mim(), 474: Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]), 475: 476: Nodes = [Node || Node <- Nodes1, Node =/= Node1Name], 477: [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes], 478: Config ++ [{nodes_clustered, Nodes}]. 479: 480: ensure_nodes_clustered(Config) -> 481: NodesToBeClustered = proplists:get_value(nodes_clustered, Config), 482: [distributed_helper:add_node_to_cluster(N, Config) 483: || N <- NodesToBeClustered], 484: Config. 485: 486: request(Method, Path) -> 487: make_request(#{role => admin, method => Method, path => iolist_to_binary(Path), 488: return_headers => true, return_maps => true}). 489: 490: request(Method, Path, Server) -> 491: make_request(#{role => admin, method => Method, path => iolist_to_binary(Path), 492: return_headers => true, return_maps => true, server => Server}).