1: %%============================================================================== 2: %% Copyright 2020 Erlang Solutions Ltd. 3: %% 4: %% Licensed under the Apache License, Version 2.0 (the "License"); 5: %% you may not use this file except in compliance with the License. 6: %% You may obtain a copy of the License at 7: %% 8: %% http://www.apache.org/licenses/LICENSE-2.0 9: %% 10: %% Unless required by applicable law or agreed to in writing, software 11: %% distributed under the License is distributed on an "AS IS" BASIS, 12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13: %% See the License for the specific language governing permissions and 14: %% limitations under the License. 15: %%============================================================================== 16: -module(metrics_api_SUITE). 17: -compile([export_all, nowarn_export_all]). 18: 19: -include_lib("common_test/include/ct.hrl"). 20: 21: -import(distributed_helper, [mim/0, rpc/4]). 22: -import(rest_helper, [assert_status/2, simple_request/2, simple_request/3, simple_request/4]). 23: -define(PORT, (ct:get_config({hosts, mim, metrics_rest_port}))). 24: 25: -include_lib("eunit/include/eunit.hrl"). 26: 27: -import(domain_helper, [host_type/0, domain/0]). 28: 29: %%-------------------------------------------------------------------- 30: %% Suite configuration 31: %%-------------------------------------------------------------------- 32: all() -> 33: [ 34: {group, metrics}, 35: {group, all_metrics_are_global}, 36: {group, global} 37: ]. 38: 39: -define(METRICS_CASES, [ 40: message_flow, 41: one_client_just_logs_in, 42: two_clients_just_log_in, 43: one_message_sent, 44: one_direct_presence_sent, 45: one_iq_sent, 46: one_message_error, 47: one_iq_error, 48: one_presence_error 49: ]). 50: 51: groups() -> 52: [ 53: {metrics, [], ?METRICS_CASES}, 54: {all_metrics_are_global, [], ?METRICS_CASES}, 55: {global, [], [session_counters, 56: node_uptime, 57: cluster_size 58: ]} 59: ]. 60: 61: init_per_suite(Config) -> 62: HostType = host_type(), 63: Config1 = dynamic_modules:save_modules(HostType, Config), 64: dynamic_modules:ensure_stopped(HostType, [mod_offline]), 65: escalus:init_per_suite(Config1). 66: 67: end_per_suite(Config) -> 68: dynamic_modules:restore_modules(Config), 69: escalus:end_per_suite(Config). 70: 71: init_per_group(GroupName, Config) -> 72: metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 73: 74: end_per_group(GroupName, Config) -> 75: metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 76: 77: init_per_testcase(cluster_size = CN, Config) -> 78: Config1 = ensure_nodes_not_clustered(Config), 79: escalus:init_per_testcase(CN, Config1); 80: init_per_testcase(CaseName, Config) -> 81: escalus:init_per_testcase(CaseName, Config). 82: 83: end_per_testcase(cluster_size = CN, Config) -> 84: Config1 = ensure_nodes_clustered(Config), 85: escalus:end_per_testcase(CN, Config1); 86: end_per_testcase(CaseName, Config) -> 87: escalus:end_per_testcase(CaseName, Config). 88: 89: %%-------------------------------------------------------------------- 90: %% metrics_api tests 91: %%-------------------------------------------------------------------- 92: 93: message_flow(Config) -> 94: case metrics_helper:all_metrics_are_global(Config) of 95: true -> metrics_only_global(Config); 96: _ -> metrics_msg_flow(Config) 97: end. 98: 99: one_client_just_logs_in(Config) -> 100: instrumented_story 101: (Config, metrics_helper:userspec(1, Config), 102: fun(_User1) -> end_of_story end, 103: %% A list of metrics and their expected relative increase 104: [{xmppIqSent, 0}, 105: {xmppIqReceived, 0}, 106: {xmppMessageSent, 0}, 107: {xmppMessageReceived, 0}, 108: {xmppPresenceSent, 0 + user_alpha(1)}, 109: {xmppPresenceReceived, 0 + user_alpha(1)}, 110: {xmppStanzaSent, 0 + user_alpha(1)}, 111: {xmppStanzaReceived, 0 + user_alpha(1)}, 112: {sessionSuccessfulLogins, 0 + user_alpha(1)}, 113: {sessionLogouts, 0 + user_alpha(1)} 114: ]). 115: 116: two_clients_just_log_in(Config) -> 117: instrumented_story 118: (Config, metrics_helper:userspec(1, 1, Config), 119: fun(_User1, _User2) -> end_of_story end, 120: [{xmppMessageSent, 0}, 121: {xmppMessageReceived, 0}, 122: {xmppStanzaSent, 0 + user_alpha(2)}, 123: {xmppStanzaReceived, 0 + user_alpha(2)}, 124: {xmppPresenceSent, 0 + user_alpha(2)}, 125: {xmppPresenceReceived, 0 + user_alpha(2)}, 126: {sessionSuccessfulLogins, 0 + user_alpha(2)}, 127: {sessionLogouts, 0 + user_alpha(2)} 128: ]). 129: 130: one_message_sent(Config) -> 131: instrumented_story 132: (Config, metrics_helper:userspec(1, 1, Config), 133: fun(User1, User2) -> 134: Chat = escalus_stanza:chat_to(User2, <<"Hi!">>), 135: escalus_client:send(User1, Chat), 136: escalus_client:wait_for_stanza(User2) 137: end, 138: [{xmppMessageSent, 1}, 139: {xmppMessageReceived, 1}]). 140: 141: one_direct_presence_sent(Config) -> 142: Userspec = metrics_helper:userspec(1, 1, Config), 143: instrumented_story 144: (Config, Userspec, 145: fun(User1, User2) -> 146: Presence = escalus_stanza:presence_direct(User2, <<"available">>), 147: escalus:send(User1, Presence), 148: escalus:wait_for_stanza(User2) 149: end, 150: [{xmppPresenceSent, 1 + user_alpha(2)}, 151: {xmppPresenceReceived, 1 + user_alpha(2)}, 152: {xmppStanzaSent, 1 + user_alpha(2)}, 153: {xmppStanzaReceived, 1 + user_alpha(2)}]). 154: 155: one_iq_sent(Config) -> 156: instrumented_story 157: (Config, metrics_helper:userspec(1, Config), 158: fun(User1) -> 159: RosterIq = escalus_stanza:roster_get(), 160: escalus_client:send(User1, RosterIq), 161: escalus_client:wait_for_stanza(User1) 162: end, 163: [{xmppIqSent, 1}, 164: {xmppIqReceived, 1}, 165: {modRosterGets, 1}, 166: {xmppStanzaSent, 1 + user_alpha(1)}, 167: {xmppStanzaReceived, 1 + user_alpha(1)}]). 168: 169: one_message_error(Config) -> 170: instrumented_story 171: (Config, metrics_helper:userspec(1, Config), 172: fun(User1) -> 173: Chat = escalus_stanza:chat_to 174: (<<"nobody@", (domain())/binary>>, <<"Hi!">>), 175: escalus_client:send(User1, Chat), 176: escalus_client:wait_for_stanza(User1) 177: end, 178: [{xmppErrorTotal, 1}, 179: {xmppErrorIq, 0}, 180: {xmppErrorMessage, 1}, 181: {xmppErrorPresence, 0}]). 182: 183: one_iq_error(Config) -> 184: instrumented_story 185: (Config, metrics_helper:userspec(1, Config), 186: fun(User1) -> 187: BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []), 188: escalus_client:send(User1, BadIQ), 189: escalus_client:wait_for_stanza(User1) 190: end, 191: [{xmppErrorTotal, 1}, 192: {xmppErrorIq, 1}, 193: {xmppErrorMessage, 0}, 194: {xmppErrorPresence, 0}]). 195: 196: one_presence_error(Config) -> 197: instrumented_story 198: (Config, metrics_helper:userspec(1, Config), 199: fun(User1) -> 200: BadPres = escalus_stanza:presence_direct 201: (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []), 202: escalus_client:send(User1, BadPres), 203: escalus_client:wait_for_stanza(User1) 204: end, 205: [{xmppErrorTotal, 1}, 206: {xmppErrorIq, 0}, 207: {xmppErrorMessage, 0}, 208: {xmppErrorPresence, 1}]). 209: 210: session_counters(Config) -> 211: Names = [totalSessionCount, uniqueSessionCount, nodeSessionCount], 212: escalus:story 213: (Config, [{alice, 2}, {bob, 1}], 214: fun(_User11, _User12, _User2) -> 215: %% Force update 216: lists:foreach(fun metrics_helper:sample/1, Names), 217: ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)), 218: ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)), 219: ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config)) 220: end). 221: 222: node_uptime(Config) -> 223: X = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 224: timer:sleep(timer:seconds(1)), 225: Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 226: ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]). 227: 228: cluster_size(Config) -> 229: SingleNodeClusterState = 230: fetch_global_incrementing_gauge_value(clusterSize, Config), 231: ?assertEqual(1, SingleNodeClusterState), 232: 233: distributed_helper:add_node_to_cluster(Config), 234: TwoNodesClusterState = 235: fetch_global_incrementing_gauge_value(clusterSize, Config), 236: ?assertEqual(2, TwoNodesClusterState), 237: 238: distributed_helper:remove_node_from_cluster(Config), 239: SingleNodeClusterState2 = 240: fetch_global_incrementing_gauge_value(clusterSize, Config), 241: ?assertEqual(1, SingleNodeClusterState2). 242: 243: %%-------------------------------------------------------------------- 244: %% Helpers 245: %%-------------------------------------------------------------------- 246: 247: metrics_only_global(_Config) -> 248: Port = ct:get_config({hosts, mim2, metrics_rest_port}), 249: % 0. GET is the only implemented allowed method 250: % (both OPTIONS and HEAD are for free then) 251: Res = simple_request(<<"OPTIONS">>, "/metrics/", Port), 252: {_S, H, _B} = Res, 253: assert_status(200, Res), 254: V = proplists:get_value(<<"allow">>, H), 255: Opts = string:split(V, ", ", all), 256: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 257: 258: % List of host types and metrics 259: Res2 = simple_request(<<"GET">>, "/metrics/", Port), 260: {_S2, _H2, B2} = Res2, 261: assert_status(200, Res2), 262: #{<<"host_types">> := [_ExampleHostType | _], 263: <<"metrics">> := [], 264: <<"global">> := [ExampleGlobal | _]} = B2, 265: 266: % All global metrics 267: Res3 = simple_request(<<"GET">>, "/metrics/global", Port), 268: {_S3, _H3, B3} = Res3, 269: assert_status(200, Res3), 270: #{<<"metrics">> := _ML} = B3, 271: ?assertEqual(1, maps:size(B3)), 272: 273: % An example global metric 274: Res4 = simple_request(<<"GET">>, 275: unicode:characters_to_list(["/metrics/global/", ExampleGlobal]), 276: Port), 277: {_S4, _H4, B4} = Res4, 278: #{<<"metric">> := _} = B4, 279: ?assertEqual(1, maps:size(B4)). 280: 281: metrics_msg_flow(_Config) -> 282: % 0. GET is the only implemented allowed method 283: % (both OPTIONS and HEAD are for free then) 284: Res = simple_request(<<"OPTIONS">>, "/metrics/", ?PORT), 285: {_S, H, _B} = Res, 286: assert_status(200, Res), 287: V = proplists:get_value(<<"allow">>, H), 288: Opts = string:split(V, ", ", all), 289: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 290: 291: % List of host types and metrics 292: Res2 = simple_request(<<"GET">>, "/metrics/", ?PORT), 293: {_S2, _H2, B2} = Res2, 294: assert_status(200, Res2), 295: #{<<"host_types">> := [ExampleHostType | _], 296: <<"metrics">> := [ExampleMetric | _], 297: <<"global">> := [ExampleGlobal | _]} = B2, 298: 299: % Sum of all metrics 300: Res3 = simple_request(<<"GET">>, "/metrics/all", ?PORT), 301: {_S3, _H3, B3} = Res3, 302: assert_status(200, Res3), 303: #{<<"metrics">> := _ML} = B3, 304: ?assertEqual(1, maps:size(B3)), 305: 306: % Sum for a given metric 307: Res4 = simple_request(<<"GET">>, 308: unicode:characters_to_list(["/metrics/all/", ExampleMetric]), 309: ?PORT), 310: {_S4, _H4, B4} = Res4, 311: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4, 312: ?assertEqual(2, maps:size(IM)), 313: ?assertEqual(1, maps:size(B4)), 314: 315: % Negative case for a non-existent given metric 316: Res5 = simple_request(<<"GET">>, "/metrics/all/nonExistentMetric", ?PORT), 317: assert_status(404, Res5), 318: 319: % All metrics for an example host type 320: Res6 = simple_request(<<"GET">>, 321: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType]), 322: ?PORT), 323: {_S6, _H6, B6} = Res6, 324: #{<<"metrics">> := _} = B6, 325: ?assertEqual(1, maps:size(B6)), 326: 327: % Negative case for a non-existent host type 328: Res7 = simple_request(<<"GET">>, "/metrics/host_type/nonExistentHostType", ?PORT), 329: assert_status(404, Res7), 330: 331: % An example metric for an example host type 332: Res8 = simple_request(<<"GET">>, 333: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType, 334: "/", ExampleMetric]), 335: ?PORT), 336: {_S8, _H8, B8} = Res8, 337: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8, 338: ?assertEqual(2, maps:size(IM2)), 339: ?assertEqual(1, maps:size(B8)), 340: 341: % Negative case for a non-existent (host type, metric) pair 342: Res9 = simple_request(<<"GET">>, 343: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType, 344: "/nonExistentMetric"]), 345: ?PORT), 346: assert_status(404, Res9), 347: 348: % All global metrics 349: Res10 = simple_request(<<"GET">>, "/metrics/global", ?PORT), 350: {_, _, B10} = Res10, 351: #{<<"metrics">> := _} = B10, 352: ?assertEqual(1, maps:size(B10)), 353: 354: Res11 = simple_request(<<"GET">>, 355: unicode:characters_to_list(["/metrics/global/", ExampleGlobal]), 356: ?PORT), 357: {_, _, B11} = Res11, 358: #{<<"metric">> := _} = B11, 359: ?assertEqual(1, maps:size(B11)). 360: 361: user_alpha(NumberOfUsers) -> 362: %% This represents the overhead of logging in N users via escalus:story/3 363: %% For each user, 364: %% xmppStanza(sent|received) 365: %% and 366: %% xmppPresence(sent|received) 367: %% will be bumped by +1 at login. 368: NumberOfUsers. 369: 370: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) -> 371: Befores = fetch_all(Config, CounterSpecs), 372: StoryResult = escalus:story(Config, UsersSpecs, StoryFun), 373: Afters = fetch_all(Config, CounterSpecs), 374: [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters)) 375: || {Name, N} <- CounterSpecs ], 376: StoryResult. 377: 378: fetch_all(Config, CounterSpecs) -> 379: FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of 380: true -> fun fetch_global_spiral_values/2; 381: _ -> fun fetch_counter_value/2 382: end, 383: [ {Counter, FetchCounterFun(Counter, Config)} 384: || {Counter, _} <- CounterSpecs ]. 385: 386: find(CounterName, CounterList) -> 387: case lists:keyfind(CounterName, 1, CounterList) of 388: false -> error(counter_defined_incorrectly); 389: {CounterName, Val} -> Val end. 390: 391: fetch_counter_value(Counter, _Config) -> 392: Metric = atom_to_binary(Counter, utf8), 393: 394: HostType = host_type(), 395: HostTypeName = metrics_helper:make_host_type_name(HostType), 396: 397: Result = simple_request(<<"GET">>, 398: unicode:characters_to_list(["/metrics/host_type/", HostTypeName, "/", Metric]), 399: ?PORT), 400: {_S, _H, B} = Result, 401: assert_status(200, Result), 402: #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B, 403: 404: Result2 = simple_request(<<"GET">>, 405: unicode:characters_to_list(["/metrics/host_type/", HostTypeName]), 406: ?PORT), 407: {_S2, _H2, B2} = Result2, 408: assert_status(200, Result2), 409: #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2, 410: 411: Result3 = simple_request(<<"GET">>, 412: unicode:characters_to_list(["/metrics/all/", Metric]), 413: ?PORT), 414: {_S3, _H3, B3} = Result3, 415: assert_status(200, Result3), 416: #{<<"metric">> := #{<<"count">> := TotalValue}} = B3, 417: 418: Result4 = simple_request(<<"GET">>, "/metrics/all/", ?PORT), 419: {_S4, _H4, B4} = Result4, 420: assert_status(200, Result4), 421: #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4, 422: 423: [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList]. 424: 425: %% @doc Fetch counter that is static. 426: fetch_global_gauge_value(Counter, Config) -> 427: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 428: ?assertEqual(Value, ValueList, [{counter, Counter}]), 429: Value. 430: 431: %% @doc Fetch counter that can be incremented by server between two API requests. 432: %% 433: %% Returns last actual value 434: fetch_global_incrementing_gauge_value(Counter, Config) -> 435: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 436: ?assertEqual(true, Value =< ValueList, [{counter, Counter}, 437: {value, Value}, 438: {value_list, ValueList}]), 439: ValueList. 440: 441: fetch_global_gauge_values(Counter, Config) -> 442: fetch_global_counter_values(<<"value">>, Counter, Config). 443: 444: fetch_global_spiral_values(Counter, Config) -> 445: % Spirals have two values associated with the metric: "one" and "count". 446: % We are interested in the latter. 447: fetch_global_counter_values(<<"count">>, Counter, Config). 448: 449: fetch_global_counter_values(MetricKey, Counter, Config) -> 450: Metric = atom_to_binary(Counter, utf8), 451: 452: Port = case metrics_helper:all_metrics_are_global(Config) of 453: true -> 454: ct:get_config({hosts, mim2, metrics_rest_port}); 455: _ -> ct:get_config({hosts, mim, metrics_rest_port}) 456: end, 457: 458: Result = simple_request(<<"GET">>, 459: unicode:characters_to_list(["/metrics/global/", Metric]), 460: Port), 461: assert_status(200, Result), 462: {_S, H, B} = Result, 463: #{<<"metric">> := #{MetricKey := Value}} = B, 464: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)), 465: ?assertEqual(1, maps:size(B)), 466: 467: Result2 = simple_request(<<"GET">>, 468: unicode:characters_to_list(["/metrics/global/"]), 469: Port), 470: assert_status(200, Result2), 471: {_S2, H2, B2} = Result2, 472: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)), 473: #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2, 474: ?assertEqual(1, maps:size(B2)), 475: 476: [Value, ValueList]. 477: 478: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) -> 479: ExpectedCounters = [Counter+Inc || Counter <- Counters1], 480: case ExpectedCounters == Counters2 of 481: false -> 482: ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]), 483: error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)}); 484: true -> ok 485: end; 486: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 -> 487: ok. 488: 489: get_diffs(L1, L2) -> 490: lists:zip(L1, L2). 491: 492: ensure_nodes_not_clustered(Config) -> 493: #{node := Node1Name} = RPCNode = mim(), 494: Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]), 495: 496: Nodes = [Node || Node <- Nodes1, Node =/= Node1Name], 497: [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes], 498: Config ++ [{nodes_clustered, Nodes}]. 499: 500: ensure_nodes_clustered(Config) -> 501: NodesToBeClustered = proplists:get_value(nodes_clustered, Config), 502: [distributed_helper:add_node_to_cluster(N, Config) 503: || N <- NodesToBeClustered], 504: Config.