1: %%============================================================================== 2: %% Copyright 2020 Erlang Solutions Ltd. 3: %% 4: %% Licensed under the Apache License, Version 2.0 (the "License"); 5: %% you may not use this file except in compliance with the License. 6: %% You may obtain a copy of the License at 7: %% 8: %% http://www.apache.org/licenses/LICENSE-2.0 9: %% 10: %% Unless required by applicable law or agreed to in writing, software 11: %% distributed under the License is distributed on an "AS IS" BASIS, 12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13: %% See the License for the specific language governing permissions and 14: %% limitations under the License. 15: %%============================================================================== 16: -module(metrics_api_SUITE). 17: -compile([export_all, nowarn_export_all]). 18: 19: -include_lib("common_test/include/ct.hrl"). 20: 21: -import(distributed_helper, [mim/0, rpc/4]). 22: -import(rest_helper, [assert_status/2, simple_request/2, simple_request/3, simple_request/4]). 23: -define(PORT, (ct:get_config({hosts, mim, metrics_rest_port}))). 24: 25: -include_lib("eunit/include/eunit.hrl"). 26: 27: -import(domain_helper, [host_type/0, domain/0]). 28: 29: %%-------------------------------------------------------------------- 30: %% Suite configuration 31: %%-------------------------------------------------------------------- 32: all() -> 33: [ 34: {group, metrics}, 35: {group, all_metrics_are_global}, 36: {group, global} 37: ]. 38: 39: -define(METRICS_CASES, [ 40: message_flow, 41: one_client_just_logs_in, 42: two_clients_just_log_in, 43: one_message_sent, 44: one_direct_presence_sent, 45: one_iq_sent, 46: one_message_error, 47: one_iq_error, 48: one_presence_error 49: ]). 50: 51: groups() -> 52: [ 53: {metrics, [], ?METRICS_CASES}, 54: {all_metrics_are_global, [], ?METRICS_CASES}, 55: {global, [], [session_counters, 56: node_uptime, 57: cluster_size 58: ]} 59: ]. 60: 61: init_per_suite(Config) -> 62: HostType = host_type(), 63: Config1 = dynamic_modules:save_modules(HostType, Config), 64: dynamic_modules:ensure_stopped(HostType, [mod_offline]), 65: escalus:init_per_suite(Config1). 66: 67: end_per_suite(Config) -> 68: dynamic_modules:restore_modules(Config), 69: escalus:end_per_suite(Config). 70: 71: init_per_group(GroupName, Config) -> 72: metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 73: 74: end_per_group(GroupName, Config) -> 75: metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global). 76: 77: init_per_testcase(cluster_size = CN, Config) -> 78: Config1 = ensure_nodes_not_clustered(Config), 79: escalus:init_per_testcase(CN, Config1); 80: init_per_testcase(CaseName, Config) -> 81: escalus:init_per_testcase(CaseName, Config). 82: 83: end_per_testcase(cluster_size = CN, Config) -> 84: Config1 = ensure_nodes_clustered(Config), 85: escalus:end_per_testcase(CN, Config1); 86: end_per_testcase(CaseName, Config) -> 87: escalus:end_per_testcase(CaseName, Config). 88: 89: %%-------------------------------------------------------------------- 90: %% metrics_api tests 91: %%-------------------------------------------------------------------- 92: 93: message_flow(Config) -> 94: case metrics_helper:all_metrics_are_global(Config) of 95: true -> metrics_only_global(Config); 96: _ -> metrics_msg_flow(Config) 97: end. 98: 99: one_client_just_logs_in(Config) -> 100: instrumented_story 101: (Config, metrics_helper:userspec(1, Config), 102: fun(_User1) -> end_of_story end, 103: %% A list of metrics and their expected relative increase 104: [{xmppIqSent, 0}, 105: {xmppIqReceived, 0}, 106: {xmppMessageSent, 0}, 107: {xmppMessageReceived, 0}, 108: {xmppPresenceSent, 0 + user_alpha(1)}, 109: {xmppPresenceReceived, 0 + user_alpha(1)}, 110: {xmppStanzaSent, 0 + user_alpha(1)}, 111: {xmppStanzaReceived, 0 + user_alpha(1)}, 112: {sessionSuccessfulLogins, 0 + user_alpha(1)}, 113: {sessionLogouts, 0 + user_alpha(1)} 114: ]). 115: 116: two_clients_just_log_in(Config) -> 117: instrumented_story 118: (Config, metrics_helper:userspec(1, 1, Config), 119: fun(_User1, _User2) -> end_of_story end, 120: [{xmppMessageSent, 0}, 121: {xmppMessageReceived, 0}, 122: {xmppStanzaSent, 0 + user_alpha(2)}, 123: {xmppStanzaReceived, 0 + user_alpha(2)}, 124: {xmppPresenceSent, 0 + user_alpha(2)}, 125: {xmppPresenceReceived, 0 + user_alpha(2)}, 126: {sessionSuccessfulLogins, 0 + user_alpha(2)}, 127: {sessionLogouts, 0 + user_alpha(2)} 128: ]). 129: 130: one_message_sent(Config) -> 131: instrumented_story 132: (Config, metrics_helper:userspec(1, 1, Config), 133: fun(User1, User2) -> 134: Chat = escalus_stanza:chat_to(User2, <<"Hi!">>), 135: escalus_client:send(User1, Chat), 136: escalus_client:wait_for_stanza(User2) 137: end, 138: [{xmppMessageSent, 1}, 139: {xmppMessageReceived, 1}]). 140: 141: one_direct_presence_sent(Config) -> 142: Userspec = metrics_helper:userspec(1, 1, Config), 143: instrumented_story 144: (Config, Userspec, 145: fun(User1, User2) -> 146: Presence = escalus_stanza:presence_direct(User2, <<"available">>), 147: escalus:send(User1, Presence), 148: escalus:wait_for_stanza(User2) 149: end, 150: [{xmppPresenceSent, 1 + user_alpha(2)}, 151: {xmppPresenceReceived, 1 + user_alpha(2)}, 152: {xmppStanzaSent, 1 + user_alpha(2)}, 153: {xmppStanzaReceived, 1 + user_alpha(2)}]). 154: 155: one_iq_sent(Config) -> 156: instrumented_story 157: (Config, metrics_helper:userspec(1, Config), 158: fun(User1) -> 159: RosterIq = escalus_stanza:roster_get(), 160: escalus_client:send(User1, RosterIq), 161: escalus_client:wait_for_stanza(User1) 162: end, 163: [{xmppIqSent, 1}, 164: {xmppIqReceived, 1}, 165: {modRosterGets, 1}, 166: {xmppStanzaSent, 1 + user_alpha(1)}, 167: {xmppStanzaReceived, 1 + user_alpha(1)}]). 168: 169: one_message_error(Config) -> 170: instrumented_story 171: (Config, metrics_helper:userspec(1, Config), 172: fun(User1) -> 173: Chat = escalus_stanza:chat_to 174: (<<"nobody@", (domain())/binary>>, <<"Hi!">>), 175: escalus_client:send(User1, Chat), 176: escalus_client:wait_for_stanza(User1) 177: end, 178: [{xmppErrorTotal, 1}, 179: {xmppErrorIq, 0}, 180: {xmppErrorMessage, 1}, 181: {xmppErrorPresence, 0}]). 182: 183: one_iq_error(Config) -> 184: instrumented_story 185: (Config, metrics_helper:userspec(1, Config), 186: fun(User1) -> 187: BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []), 188: escalus_client:send(User1, BadIQ), 189: escalus_client:wait_for_stanza(User1) 190: end, 191: [{xmppErrorTotal, 1}, 192: {xmppErrorIq, 1}, 193: {xmppErrorMessage, 0}, 194: {xmppErrorPresence, 0}]). 195: 196: one_presence_error(Config) -> 197: instrumented_story 198: (Config, metrics_helper:userspec(1, Config), 199: fun(User1) -> 200: BadPres = escalus_stanza:presence_direct 201: (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []), 202: escalus_client:send(User1, BadPres), 203: escalus_client:wait_for_stanza(User1) 204: end, 205: [{xmppErrorTotal, 1}, 206: {xmppErrorIq, 0}, 207: {xmppErrorMessage, 0}, 208: {xmppErrorPresence, 1}]). 209: 210: session_counters(Config) -> 211: escalus:story 212: (Config, [{alice, 2}, {bob, 1}], 213: fun(_User11, _User12, _User2) -> 214: ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)), 215: ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)), 216: ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config)) 217: end). 218: 219: node_uptime(Config) -> 220: X = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 221: timer:sleep(timer:seconds(1)), 222: Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config), 223: ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]). 224: 225: cluster_size(Config) -> 226: SingleNodeClusterState = 227: fetch_global_incrementing_gauge_value(clusterSize, Config), 228: ?assertEqual(1, SingleNodeClusterState), 229: 230: distributed_helper:add_node_to_cluster(Config), 231: TwoNodesClusterState = 232: fetch_global_incrementing_gauge_value(clusterSize, Config), 233: ?assertEqual(2, TwoNodesClusterState), 234: 235: distributed_helper:remove_node_from_cluster(Config), 236: SingleNodeClusterState2 = 237: fetch_global_incrementing_gauge_value(clusterSize, Config), 238: ?assertEqual(1, SingleNodeClusterState2). 239: 240: %%-------------------------------------------------------------------- 241: %% Helpers 242: %%-------------------------------------------------------------------- 243: 244: metrics_only_global(_Config) -> 245: Port = ct:get_config({hosts, mim2, metrics_rest_port}), 246: % 0. GET is the only implemented allowed method 247: % (both OPTIONS and HEAD are for free then) 248: Res = simple_request(<<"OPTIONS">>, "/metrics/", Port), 249: {_S, H, _B} = Res, 250: assert_status(200, Res), 251: V = proplists:get_value(<<"allow">>, H), 252: Opts = string:split(V, ", ", all), 253: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 254: 255: % List of host types and metrics 256: Res2 = simple_request(<<"GET">>, "/metrics/", Port), 257: {_S2, _H2, B2} = Res2, 258: assert_status(200, Res2), 259: #{<<"host_types">> := [_ExampleHostType | _], 260: <<"metrics">> := [], 261: <<"global">> := [ExampleGlobal | _]} = B2, 262: 263: % All global metrics 264: Res3 = simple_request(<<"GET">>, "/metrics/global", Port), 265: {_S3, _H3, B3} = Res3, 266: assert_status(200, Res3), 267: #{<<"metrics">> := _ML} = B3, 268: ?assertEqual(1, maps:size(B3)), 269: 270: % An example global metric 271: Res4 = simple_request(<<"GET">>, 272: unicode:characters_to_list(["/metrics/global/", ExampleGlobal]), 273: Port), 274: {_S4, _H4, B4} = Res4, 275: #{<<"metric">> := _} = B4, 276: ?assertEqual(1, maps:size(B4)). 277: 278: metrics_msg_flow(_Config) -> 279: % 0. GET is the only implemented allowed method 280: % (both OPTIONS and HEAD are for free then) 281: Res = simple_request(<<"OPTIONS">>, "/metrics/", ?PORT), 282: {_S, H, _B} = Res, 283: assert_status(200, Res), 284: V = proplists:get_value(<<"allow">>, H), 285: Opts = string:split(V, ", ", all), 286: ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)), 287: 288: % List of host types and metrics 289: Res2 = simple_request(<<"GET">>, "/metrics/", ?PORT), 290: {_S2, _H2, B2} = Res2, 291: assert_status(200, Res2), 292: #{<<"host_types">> := [ExampleHostType | _], 293: <<"metrics">> := [ExampleMetric | _], 294: <<"global">> := [ExampleGlobal | _]} = B2, 295: 296: % Sum of all metrics 297: Res3 = simple_request(<<"GET">>, "/metrics/all", ?PORT), 298: {_S3, _H3, B3} = Res3, 299: assert_status(200, Res3), 300: #{<<"metrics">> := _ML} = B3, 301: ?assertEqual(1, maps:size(B3)), 302: 303: % Sum for a given metric 304: Res4 = simple_request(<<"GET">>, 305: unicode:characters_to_list(["/metrics/all/", ExampleMetric]), 306: ?PORT), 307: {_S4, _H4, B4} = Res4, 308: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4, 309: ?assertEqual(2, maps:size(IM)), 310: ?assertEqual(1, maps:size(B4)), 311: 312: % Negative case for a non-existent given metric 313: Res5 = simple_request(<<"GET">>, "/metrics/all/nonExistentMetric", ?PORT), 314: assert_status(404, Res5), 315: 316: % All metrics for an example host type 317: Res6 = simple_request(<<"GET">>, 318: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType]), 319: ?PORT), 320: {_S6, _H6, B6} = Res6, 321: #{<<"metrics">> := _} = B6, 322: ?assertEqual(1, maps:size(B6)), 323: 324: % Negative case for a non-existent host type 325: Res7 = simple_request(<<"GET">>, "/metrics/host_type/nonExistentHostType", ?PORT), 326: assert_status(404, Res7), 327: 328: % An example metric for an example host type 329: Res8 = simple_request(<<"GET">>, 330: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType, 331: "/", ExampleMetric]), 332: ?PORT), 333: {_S8, _H8, B8} = Res8, 334: #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8, 335: ?assertEqual(2, maps:size(IM2)), 336: ?assertEqual(1, maps:size(B8)), 337: 338: % Negative case for a non-existent (host type, metric) pair 339: Res9 = simple_request(<<"GET">>, 340: unicode:characters_to_list(["/metrics/host_type/", ExampleHostType, 341: "/nonExistentMetric"]), 342: ?PORT), 343: assert_status(404, Res9), 344: 345: % All global metrics 346: Res10 = simple_request(<<"GET">>, "/metrics/global", ?PORT), 347: {_, _, B10} = Res10, 348: #{<<"metrics">> := _} = B10, 349: ?assertEqual(1, maps:size(B10)), 350: 351: Res11 = simple_request(<<"GET">>, 352: unicode:characters_to_list(["/metrics/global/", ExampleGlobal]), 353: ?PORT), 354: {_, _, B11} = Res11, 355: #{<<"metric">> := _} = B11, 356: ?assertEqual(1, maps:size(B11)). 357: 358: user_alpha(NumberOfUsers) -> 359: %% This represents the overhead of logging in N users via escalus:story/3 360: %% For each user, 361: %% xmppStanza(sent|received) 362: %% and 363: %% xmppPresence(sent|received) 364: %% will be bumped by +1 at login. 365: NumberOfUsers. 366: 367: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) -> 368: Befores = fetch_all(Config, CounterSpecs), 369: StoryResult = escalus:story(Config, UsersSpecs, StoryFun), 370: Afters = fetch_all(Config, CounterSpecs), 371: [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters)) 372: || {Name, N} <- CounterSpecs ], 373: StoryResult. 374: 375: fetch_all(Config, CounterSpecs) -> 376: FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of 377: true -> fun fetch_global_spiral_values/2; 378: _ -> fun fetch_counter_value/2 379: end, 380: [ {Counter, FetchCounterFun(Counter, Config)} 381: || {Counter, _} <- CounterSpecs ]. 382: 383: find(CounterName, CounterList) -> 384: case lists:keyfind(CounterName, 1, CounterList) of 385: false -> error(counter_defined_incorrectly); 386: {CounterName, Val} -> Val end. 387: 388: fetch_counter_value(Counter, _Config) -> 389: Metric = atom_to_binary(Counter, utf8), 390: 391: HostType = host_type(), 392: HostTypeName = metrics_helper:make_host_type_name(HostType), 393: 394: Result = simple_request(<<"GET">>, 395: unicode:characters_to_list(["/metrics/host_type/", HostTypeName, "/", Metric]), 396: ?PORT), 397: {_S, _H, B} = Result, 398: assert_status(200, Result), 399: #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B, 400: 401: Result2 = simple_request(<<"GET">>, 402: unicode:characters_to_list(["/metrics/host_type/", HostTypeName]), 403: ?PORT), 404: {_S2, _H2, B2} = Result2, 405: assert_status(200, Result2), 406: #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2, 407: 408: Result3 = simple_request(<<"GET">>, 409: unicode:characters_to_list(["/metrics/all/", Metric]), 410: ?PORT), 411: {_S3, _H3, B3} = Result3, 412: assert_status(200, Result3), 413: #{<<"metric">> := #{<<"count">> := TotalValue}} = B3, 414: 415: Result4 = simple_request(<<"GET">>, "/metrics/all/", ?PORT), 416: {_S4, _H4, B4} = Result4, 417: assert_status(200, Result4), 418: #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4, 419: 420: [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList]. 421: 422: %% @doc Fetch counter that is static. 423: fetch_global_gauge_value(Counter, Config) -> 424: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 425: ?assertEqual(Value, ValueList, [{counter, Counter}]), 426: Value. 427: 428: %% @doc Fetch counter that can be incremented by server between two API requests. 429: %% 430: %% Returns last actual value 431: fetch_global_incrementing_gauge_value(Counter, Config) -> 432: [Value, ValueList] = fetch_global_gauge_values(Counter, Config), 433: ?assertEqual(true, Value =< ValueList, [{counter, Counter}, 434: {value, Value}, 435: {value_list, ValueList}]), 436: ValueList. 437: 438: fetch_global_gauge_values(Counter, Config) -> 439: fetch_global_counter_values(<<"value">>, Counter, Config). 440: 441: fetch_global_spiral_values(Counter, Config) -> 442: % Spirals have two values associated with the metric: "one" and "count". 443: % We are interested in the latter. 444: fetch_global_counter_values(<<"count">>, Counter, Config). 445: 446: fetch_global_counter_values(MetricKey, Counter, Config) -> 447: Metric = atom_to_binary(Counter, utf8), 448: 449: Port = case metrics_helper:all_metrics_are_global(Config) of 450: true -> 451: ct:get_config({hosts, mim2, metrics_rest_port}); 452: _ -> ct:get_config({hosts, mim, metrics_rest_port}) 453: end, 454: 455: Result = simple_request(<<"GET">>, 456: unicode:characters_to_list(["/metrics/global/", Metric]), 457: Port), 458: assert_status(200, Result), 459: {_S, H, B} = Result, 460: #{<<"metric">> := #{MetricKey := Value}} = B, 461: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)), 462: ?assertEqual(1, maps:size(B)), 463: 464: Result2 = simple_request(<<"GET">>, 465: unicode:characters_to_list(["/metrics/global/"]), 466: Port), 467: assert_status(200, Result2), 468: {_S2, H2, B2} = Result2, 469: ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)), 470: #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2, 471: ?assertEqual(1, maps:size(B2)), 472: 473: [Value, ValueList]. 474: 475: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) -> 476: ExpectedCounters = [Counter+Inc || Counter <- Counters1], 477: case ExpectedCounters == Counters2 of 478: false -> 479: ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]), 480: error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)}); 481: true -> ok 482: end; 483: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 -> 484: ok. 485: 486: get_diffs(L1, L2) -> 487: lists:zip(L1, L2). 488: 489: ensure_nodes_not_clustered(Config) -> 490: #{node := Node1Name} = RPCNode = mim(), 491: Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]), 492: 493: Nodes = [Node || Node <- Nodes1, Node =/= Node1Name], 494: [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes], 495: Config ++ [{nodes_clustered, Nodes}]. 496: 497: ensure_nodes_clustered(Config) -> 498: NodesToBeClustered = proplists:get_value(nodes_clustered, Config), 499: [distributed_helper:add_node_to_cluster(N, Config) 500: || N <- NodesToBeClustered], 501: Config.