1: %%==============================================================================
    2: %% Copyright 2020 Erlang Solutions Ltd.
    3: %%
    4: %% Licensed under the Apache License, Version 2.0 (the "License");
    5: %% you may not use this file except in compliance with the License.
    6: %% You may obtain a copy of the License at
    7: %%
    8: %% http://www.apache.org/licenses/LICENSE-2.0
    9: %%
   10: %% Unless required by applicable law or agreed to in writing, software
   11: %% distributed under the License is distributed on an "AS IS" BASIS,
   12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   13: %% See the License for the specific language governing permissions and
   14: %% limitations under the License.
   15: %%==============================================================================
   16: -module(metrics_api_SUITE).
   17: -compile([export_all, nowarn_export_all]).
   18: 
   19: -include_lib("common_test/include/ct.hrl").
   20: 
   21: -import(distributed_helper, [mim/0, rpc/4]).
   22: -import(rest_helper, [assert_status/2, simple_request/2, simple_request/3, simple_request/4]).
   23: -define(PORT, (ct:get_config({hosts, mim, metrics_rest_port}))).
   24: 
   25: -include_lib("eunit/include/eunit.hrl").
   26: 
   27: -import(domain_helper, [host_type/0, domain/0]).
   28: 
   29: %%--------------------------------------------------------------------
   30: %% Suite configuration
   31: %%--------------------------------------------------------------------
   32: all() ->
   33:     [
   34:      {group, metrics},
   35:      {group, all_metrics_are_global},
   36:      {group, global}
   37:     ].
   38: 
   39: -define(METRICS_CASES, [
   40:                         message_flow,
   41:                         one_client_just_logs_in,
   42:                         two_clients_just_log_in,
   43:                         one_message_sent,
   44:                         one_direct_presence_sent,
   45:                         one_iq_sent,
   46:                         one_message_error,
   47:                         one_iq_error,
   48:                         one_presence_error
   49:                        ]).
   50: 
   51: groups() ->
   52:     [
   53:      {metrics, [], ?METRICS_CASES},
   54:      {all_metrics_are_global, [], ?METRICS_CASES},
   55:      {global, [], [session_counters,
   56:                    node_uptime,
   57:                    cluster_size
   58:                   ]}
   59:     ].
   60: 
   61: init_per_suite(Config) ->
   62:     HostType = host_type(),
   63:     Config1 = dynamic_modules:save_modules(HostType, Config),
   64:     dynamic_modules:ensure_stopped(HostType, [mod_offline]),
   65:     escalus:init_per_suite(Config1).
   66: 
   67: end_per_suite(Config) ->
   68:     dynamic_modules:restore_modules(Config),
   69:     escalus:end_per_suite(Config).
   70: 
   71: init_per_group(GroupName, Config) ->
   72:     metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   73: 
   74: end_per_group(GroupName, Config) ->
   75:     metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   76: 
   77: init_per_testcase(cluster_size = CN, Config) ->
   78:     Config1 = ensure_nodes_not_clustered(Config),
   79:     escalus:init_per_testcase(CN, Config1);
   80: init_per_testcase(CaseName, Config) ->
   81:     escalus:init_per_testcase(CaseName, Config).
   82: 
   83: end_per_testcase(cluster_size = CN, Config) ->
   84:     Config1 = ensure_nodes_clustered(Config),
   85:     escalus:end_per_testcase(CN, Config1);
   86: end_per_testcase(CaseName, Config) ->
   87:     escalus:end_per_testcase(CaseName, Config).
   88: 
   89: %%--------------------------------------------------------------------
   90: %% metrics_api tests
   91: %%--------------------------------------------------------------------
   92: 
   93: message_flow(Config) ->
   94:     case metrics_helper:all_metrics_are_global(Config) of
   95:         true -> metrics_only_global(Config);
   96:         _ -> metrics_msg_flow(Config)
   97:     end.
   98: 
   99: one_client_just_logs_in(Config) ->
  100:     instrumented_story
  101:         (Config, metrics_helper:userspec(1, Config),
  102:          fun(_User1) -> end_of_story end,
  103:          %% A list of metrics and their expected relative increase
  104:          [{xmppIqSent, 0},
  105:           {xmppIqReceived, 0},
  106:           {xmppMessageSent, 0},
  107:           {xmppMessageReceived, 0},
  108:           {xmppPresenceSent, 0 + user_alpha(1)},
  109:           {xmppPresenceReceived, 0 + user_alpha(1)},
  110:           {xmppStanzaSent, 0 + user_alpha(1)},
  111:           {xmppStanzaReceived, 0 + user_alpha(1)},
  112:           {sessionSuccessfulLogins, 0 + user_alpha(1)},
  113:           {sessionLogouts, 0 + user_alpha(1)}
  114:          ]).
  115: 
  116: two_clients_just_log_in(Config) ->
  117:     instrumented_story
  118:         (Config, metrics_helper:userspec(1, 1, Config),
  119:          fun(_User1, _User2) -> end_of_story end,
  120:          [{xmppMessageSent, 0},
  121:           {xmppMessageReceived, 0},
  122:           {xmppStanzaSent, 0 + user_alpha(2)},
  123:           {xmppStanzaReceived, 0 + user_alpha(2)},
  124:           {xmppPresenceSent, 0 + user_alpha(2)},
  125:           {xmppPresenceReceived, 0 + user_alpha(2)},
  126:           {sessionSuccessfulLogins, 0 + user_alpha(2)},
  127:           {sessionLogouts, 0 + user_alpha(2)}
  128:          ]).
  129: 
  130: one_message_sent(Config) ->
  131:     instrumented_story
  132:       (Config, metrics_helper:userspec(1, 1, Config),
  133:        fun(User1, User2) ->
  134:                Chat = escalus_stanza:chat_to(User2, <<"Hi!">>),
  135:                escalus_client:send(User1, Chat),
  136:                escalus_client:wait_for_stanza(User2)
  137:        end,
  138:        [{xmppMessageSent,     1},
  139:         {xmppMessageReceived, 1}]).
  140: 
  141: one_direct_presence_sent(Config) ->
  142:     Userspec = metrics_helper:userspec(1, 1, Config),
  143:     instrumented_story
  144:       (Config, Userspec,
  145:        fun(User1, User2) ->
  146:                Presence = escalus_stanza:presence_direct(User2, <<"available">>),
  147:                escalus:send(User1, Presence),
  148:                escalus:wait_for_stanza(User2)
  149:         end,
  150:        [{xmppPresenceSent, 1 + user_alpha(2)},
  151:         {xmppPresenceReceived, 1 + user_alpha(2)},
  152:         {xmppStanzaSent, 1 + user_alpha(2)},
  153:         {xmppStanzaReceived, 1 + user_alpha(2)}]).
  154: 
  155: one_iq_sent(Config) ->
  156:     instrumented_story
  157:       (Config, metrics_helper:userspec(1, Config),
  158:        fun(User1) ->
  159:                RosterIq = escalus_stanza:roster_get(),
  160:                escalus_client:send(User1, RosterIq),
  161:                escalus_client:wait_for_stanza(User1)
  162:         end,
  163:        [{xmppIqSent, 1},
  164:         {xmppIqReceived, 1},
  165:         {modRosterGets, 1},
  166:         {xmppStanzaSent, 1 + user_alpha(1)},
  167:         {xmppStanzaReceived, 1 + user_alpha(1)}]).
  168: 
  169: one_message_error(Config) ->
  170:     instrumented_story
  171:       (Config, metrics_helper:userspec(1, Config),
  172:        fun(User1) ->
  173:                Chat = escalus_stanza:chat_to
  174:                         (<<"nobody@", (domain())/binary>>, <<"Hi!">>),
  175:                escalus_client:send(User1, Chat),
  176:                escalus_client:wait_for_stanza(User1)
  177:         end,
  178:        [{xmppErrorTotal, 1},
  179:         {xmppErrorIq, 0},
  180:         {xmppErrorMessage, 1},
  181:         {xmppErrorPresence, 0}]).
  182: 
  183: one_iq_error(Config) ->
  184:     instrumented_story
  185:       (Config, metrics_helper:userspec(1, Config),
  186:        fun(User1) ->
  187:                BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []),
  188:                escalus_client:send(User1, BadIQ),
  189:                escalus_client:wait_for_stanza(User1)
  190:         end,
  191:        [{xmppErrorTotal, 1},
  192:         {xmppErrorIq, 1},
  193:         {xmppErrorMessage, 0},
  194:         {xmppErrorPresence, 0}]).
  195: 
  196: one_presence_error(Config) ->
  197:     instrumented_story
  198:       (Config, metrics_helper:userspec(1, Config),
  199:        fun(User1) ->
  200:                BadPres = escalus_stanza:presence_direct
  201:                            (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []),
  202:                escalus_client:send(User1, BadPres),
  203:                escalus_client:wait_for_stanza(User1)
  204:         end,
  205:        [{xmppErrorTotal, 1},
  206:         {xmppErrorIq, 0},
  207:         {xmppErrorMessage, 0},
  208:         {xmppErrorPresence, 1}]).
  209: 
  210: session_counters(Config) ->
  211:     escalus:story
  212:       (Config, [{alice, 2}, {bob, 1}],
  213:        fun(_User11, _User12, _User2) ->
  214:                ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)),
  215:                ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)),
  216:                ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config))
  217:        end).
  218: 
  219: node_uptime(Config) ->
  220:       X = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  221:       timer:sleep(timer:seconds(1)),
  222:       Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  223:       ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]).
  224: 
  225: cluster_size(Config) ->
  226:       SingleNodeClusterState =
  227:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  228:       ?assertEqual(1, SingleNodeClusterState),
  229: 
  230:       distributed_helper:add_node_to_cluster(Config),
  231:       TwoNodesClusterState =
  232:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  233:       ?assertEqual(2, TwoNodesClusterState),
  234: 
  235:       distributed_helper:remove_node_from_cluster(Config),
  236:       SingleNodeClusterState2 =
  237:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  238:       ?assertEqual(1, SingleNodeClusterState2).
  239: 
  240: %%--------------------------------------------------------------------
  241: %% Helpers
  242: %%--------------------------------------------------------------------
  243: 
  244: metrics_only_global(_Config) ->
  245:     Port = ct:get_config({hosts, mim2, metrics_rest_port}),
  246:     % 0. GET is the only implemented allowed method
  247:     % (both OPTIONS and HEAD are for free then)
  248:     Res = simple_request(<<"OPTIONS">>, "/metrics/", Port),
  249:     {_S, H, _B} = Res,
  250:     assert_status(200, Res),
  251:     V = proplists:get_value(<<"allow">>, H),
  252:     Opts = string:split(V, ", ", all),
  253:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  254: 
  255:     % List of host types and metrics
  256:     Res2 = simple_request(<<"GET">>, "/metrics/", Port),
  257:     {_S2, _H2, B2} = Res2,
  258:     assert_status(200, Res2),
  259:     #{<<"host_types">> := [_ExampleHostType | _],
  260:       <<"metrics">> := [],
  261:       <<"global">> := [ExampleGlobal | _]} = B2,
  262: 
  263:     % All global metrics
  264:     Res3 = simple_request(<<"GET">>, "/metrics/global", Port),
  265:     {_S3, _H3, B3} = Res3,
  266:     assert_status(200, Res3),
  267:     #{<<"metrics">> := _ML} = B3,
  268:     ?assertEqual(1, maps:size(B3)),
  269: 
  270:     % An example global metric
  271:     Res4 = simple_request(<<"GET">>,
  272:                           unicode:characters_to_list(["/metrics/global/", ExampleGlobal]),
  273:                           Port),
  274:     {_S4, _H4, B4} = Res4,
  275:     #{<<"metric">> := _} = B4,
  276:     ?assertEqual(1, maps:size(B4)).
  277: 
  278: metrics_msg_flow(_Config) ->
  279:     % 0. GET is the only implemented allowed method
  280:     % (both OPTIONS and HEAD are for free then)
  281:     Res = simple_request(<<"OPTIONS">>, "/metrics/", ?PORT),
  282:     {_S, H, _B} = Res,
  283:     assert_status(200, Res),
  284:     V = proplists:get_value(<<"allow">>, H),
  285:     Opts = string:split(V, ", ", all),
  286:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  287: 
  288:     % List of host types and metrics
  289:     Res2 = simple_request(<<"GET">>, "/metrics/", ?PORT),
  290:     {_S2, _H2, B2} = Res2,
  291:     assert_status(200, Res2),
  292:     #{<<"host_types">> := [ExampleHostType | _],
  293:       <<"metrics">> := [ExampleMetric | _],
  294:       <<"global">> := [ExampleGlobal | _]} = B2,
  295: 
  296:     % Sum of all metrics
  297:     Res3 = simple_request(<<"GET">>, "/metrics/all", ?PORT),
  298:     {_S3, _H3, B3} = Res3,
  299:     assert_status(200, Res3),
  300:     #{<<"metrics">> := _ML} = B3,
  301:     ?assertEqual(1, maps:size(B3)),
  302: 
  303:     % Sum for a given metric
  304:     Res4 = simple_request(<<"GET">>,
  305:                           unicode:characters_to_list(["/metrics/all/", ExampleMetric]),
  306:                           ?PORT),
  307:     {_S4, _H4, B4} = Res4,
  308:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4,
  309:     ?assertEqual(2, maps:size(IM)),
  310:     ?assertEqual(1, maps:size(B4)),
  311: 
  312:     % Negative case for a non-existent given metric
  313:     Res5 = simple_request(<<"GET">>, "/metrics/all/nonExistentMetric", ?PORT),
  314:     assert_status(404, Res5),
  315: 
  316:     % All metrics for an example host type
  317:     Res6 = simple_request(<<"GET">>,
  318:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType]),
  319:                           ?PORT),
  320:     {_S6, _H6, B6} = Res6,
  321:     #{<<"metrics">> := _} = B6,
  322:     ?assertEqual(1, maps:size(B6)),
  323: 
  324:     % Negative case for a non-existent host type
  325:     Res7 = simple_request(<<"GET">>, "/metrics/host_type/nonExistentHostType", ?PORT),
  326:     assert_status(404, Res7),
  327: 
  328:     % An example metric for an example host type
  329:     Res8 = simple_request(<<"GET">>,
  330:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType,
  331:                                                "/", ExampleMetric]),
  332:                           ?PORT),
  333:     {_S8, _H8, B8} = Res8,
  334:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8,
  335:     ?assertEqual(2, maps:size(IM2)),
  336:     ?assertEqual(1, maps:size(B8)),
  337: 
  338:     % Negative case for a non-existent (host type, metric) pair
  339:     Res9 = simple_request(<<"GET">>,
  340:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType,
  341:                                                "/nonExistentMetric"]),
  342:                           ?PORT),
  343:     assert_status(404, Res9),
  344: 
  345:     % All global metrics
  346:     Res10 = simple_request(<<"GET">>, "/metrics/global", ?PORT),
  347:     {_, _, B10} = Res10,
  348:     #{<<"metrics">> := _} = B10,
  349:     ?assertEqual(1, maps:size(B10)),
  350: 
  351:     Res11 = simple_request(<<"GET">>,
  352:                            unicode:characters_to_list(["/metrics/global/", ExampleGlobal]),
  353:                            ?PORT),
  354:     {_, _, B11} = Res11,
  355:     #{<<"metric">> := _} = B11,
  356:     ?assertEqual(1, maps:size(B11)).
  357: 
  358: user_alpha(NumberOfUsers) ->
  359:     %% This represents the overhead of logging in N users via escalus:story/3
  360:     %% For each user,
  361:     %%     xmppStanza(sent|received)
  362:     %%     and
  363:     %%     xmppPresence(sent|received)
  364:     %% will be bumped by +1 at login.
  365:     NumberOfUsers.
  366: 
  367: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) ->
  368:     Befores = fetch_all(Config, CounterSpecs),
  369:     StoryResult = escalus:story(Config, UsersSpecs, StoryFun),
  370:     Afters =  fetch_all(Config, CounterSpecs),
  371:     [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters))
  372:       || {Name, N} <- CounterSpecs ],
  373:     StoryResult.
  374: 
  375: fetch_all(Config, CounterSpecs) ->
  376:     FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of
  377:                           true -> fun fetch_global_spiral_values/2;
  378:                           _ -> fun fetch_counter_value/2
  379:                       end,
  380:     [ {Counter, FetchCounterFun(Counter, Config)}
  381:       || {Counter, _} <- CounterSpecs ].
  382: 
  383: find(CounterName, CounterList) ->
  384:     case lists:keyfind(CounterName, 1, CounterList) of
  385:         false -> error(counter_defined_incorrectly);
  386:         {CounterName, Val} -> Val end.
  387: 
  388: fetch_counter_value(Counter, _Config) ->
  389:     Metric = atom_to_binary(Counter, utf8),
  390: 
  391:     HostType = host_type(),
  392:     HostTypeName = metrics_helper:make_host_type_name(HostType),
  393: 
  394:     Result = simple_request(<<"GET">>,
  395:                             unicode:characters_to_list(["/metrics/host_type/", HostTypeName, "/", Metric]),
  396:                             ?PORT),
  397:     {_S, _H, B} = Result,
  398:     assert_status(200, Result),
  399:     #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B,
  400: 
  401:     Result2 = simple_request(<<"GET">>,
  402:                              unicode:characters_to_list(["/metrics/host_type/", HostTypeName]),
  403:                              ?PORT),
  404:     {_S2, _H2, B2} = Result2,
  405:     assert_status(200, Result2),
  406:     #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2,
  407: 
  408:     Result3 = simple_request(<<"GET">>,
  409:                              unicode:characters_to_list(["/metrics/all/", Metric]),
  410:                              ?PORT),
  411:     {_S3, _H3, B3} = Result3,
  412:     assert_status(200, Result3),
  413:     #{<<"metric">> := #{<<"count">> := TotalValue}} = B3,
  414: 
  415:     Result4 = simple_request(<<"GET">>, "/metrics/all/", ?PORT),
  416:     {_S4, _H4, B4} = Result4,
  417:     assert_status(200, Result4),
  418:     #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4,
  419: 
  420:     [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList].
  421: 
  422: %% @doc Fetch counter that is static.
  423: fetch_global_gauge_value(Counter, Config) ->
  424:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  425:     ?assertEqual(Value, ValueList, [{counter, Counter}]),
  426:     Value.
  427: 
  428: %% @doc Fetch counter that can be incremented by server between two API requests.
  429: %%
  430: %% Returns last actual value
  431: fetch_global_incrementing_gauge_value(Counter, Config) ->
  432:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  433:     ?assertEqual(true, Value =< ValueList, [{counter, Counter},
  434:                                                    {value, Value},
  435:                                                    {value_list, ValueList}]),
  436:     ValueList.
  437: 
  438: fetch_global_gauge_values(Counter, Config) ->
  439:     fetch_global_counter_values(<<"value">>, Counter, Config).
  440: 
  441: fetch_global_spiral_values(Counter, Config) ->
  442:     % Spirals have two values associated with the metric: "one" and "count".
  443:     % We are interested in the latter.
  444:     fetch_global_counter_values(<<"count">>, Counter, Config).
  445: 
  446: fetch_global_counter_values(MetricKey, Counter, Config) ->
  447:     Metric = atom_to_binary(Counter, utf8),
  448: 
  449:     Port = case metrics_helper:all_metrics_are_global(Config) of
  450:                true ->
  451:                    ct:get_config({hosts, mim2, metrics_rest_port});
  452:                _ -> ct:get_config({hosts, mim, metrics_rest_port})
  453:            end,
  454: 
  455:     Result = simple_request(<<"GET">>,
  456:                             unicode:characters_to_list(["/metrics/global/", Metric]),
  457:                             Port),
  458:     assert_status(200, Result),
  459:     {_S, H, B} = Result,
  460:     #{<<"metric">> := #{MetricKey := Value}} = B,
  461:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)),
  462:     ?assertEqual(1, maps:size(B)),
  463: 
  464:     Result2 = simple_request(<<"GET">>,
  465:                              unicode:characters_to_list(["/metrics/global/"]),
  466:                              Port),
  467:     assert_status(200, Result2),
  468:     {_S2, H2, B2} = Result2,
  469:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)),
  470:     #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2,
  471:     ?assertEqual(1, maps:size(B2)),
  472: 
  473:     [Value, ValueList].
  474: 
  475: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) ->
  476:     ExpectedCounters = [Counter+Inc || Counter <- Counters1],
  477:     case ExpectedCounters == Counters2 of
  478:         false ->
  479:             ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]),
  480:             error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)});
  481:         true -> ok
  482:     end;
  483: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 ->
  484:     ok.
  485: 
  486: get_diffs(L1, L2) ->
  487:     lists:zip(L1, L2).
  488: 
  489: ensure_nodes_not_clustered(Config) ->
  490:     #{node := Node1Name} = RPCNode = mim(),
  491:     Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]),
  492: 
  493:     Nodes = [Node || Node <- Nodes1, Node =/= Node1Name],
  494:     [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes],
  495:     Config ++ [{nodes_clustered, Nodes}].
  496: 
  497: ensure_nodes_clustered(Config) ->
  498:     NodesToBeClustered = proplists:get_value(nodes_clustered, Config),
  499:     [distributed_helper:add_node_to_cluster(N, Config)
  500:      || N <- NodesToBeClustered],
  501:     Config.