1: %%==============================================================================
    2: %% Copyright 2020 Erlang Solutions Ltd.
    3: %%
    4: %% Licensed under the Apache License, Version 2.0 (the "License");
    5: %% you may not use this file except in compliance with the License.
    6: %% You may obtain a copy of the License at
    7: %%
    8: %% http://www.apache.org/licenses/LICENSE-2.0
    9: %%
   10: %% Unless required by applicable law or agreed to in writing, software
   11: %% distributed under the License is distributed on an "AS IS" BASIS,
   12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   13: %% See the License for the specific language governing permissions and
   14: %% limitations under the License.
   15: %%==============================================================================
   16: -module(metrics_api_SUITE).
   17: -compile([export_all, nowarn_export_all]).
   18: 
   19: -include_lib("common_test/include/ct.hrl").
   20: 
   21: -import(distributed_helper, [mim/0, rpc/4]).
   22: -import(rest_helper, [assert_status/2, simple_request/2, simple_request/3, simple_request/4]).
   23: -define(PORT, (ct:get_config({hosts, mim, metrics_rest_port}))).
   24: 
   25: -include_lib("eunit/include/eunit.hrl").
   26: 
   27: -import(domain_helper, [host_type/0, domain/0]).
   28: 
   29: %%--------------------------------------------------------------------
   30: %% Suite configuration
   31: %%--------------------------------------------------------------------
   32: all() ->
   33:     [
   34:      {group, metrics},
   35:      {group, all_metrics_are_global},
   36:      {group, global}
   37:     ].
   38: 
   39: -define(METRICS_CASES, [
   40:                         message_flow,
   41:                         one_client_just_logs_in,
   42:                         two_clients_just_log_in,
   43:                         one_message_sent,
   44:                         one_direct_presence_sent,
   45:                         one_iq_sent,
   46:                         one_message_error,
   47:                         one_iq_error,
   48:                         one_presence_error
   49:                        ]).
   50: 
   51: groups() ->
   52:     [
   53:      {metrics, [], ?METRICS_CASES},
   54:      {all_metrics_are_global, [], ?METRICS_CASES},
   55:      {global, [], [session_counters,
   56:                    node_uptime,
   57:                    cluster_size
   58:                   ]}
   59:     ].
   60: 
   61: init_per_suite(Config) ->
   62:     HostType = host_type(),
   63:     Config1 = dynamic_modules:save_modules(HostType, Config),
   64:     dynamic_modules:ensure_stopped(HostType, [mod_offline]),
   65:     escalus:init_per_suite(Config1).
   66: 
   67: end_per_suite(Config) ->
   68:     dynamic_modules:restore_modules(Config),
   69:     escalus:end_per_suite(Config).
   70: 
   71: init_per_group(GroupName, Config) ->
   72:     metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   73: 
   74: end_per_group(GroupName, Config) ->
   75:     metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   76: 
   77: init_per_testcase(cluster_size = CN, Config) ->
   78:     Config1 = ensure_nodes_not_clustered(Config),
   79:     escalus:init_per_testcase(CN, Config1);
   80: init_per_testcase(CaseName, Config) ->
   81:     escalus:init_per_testcase(CaseName, Config).
   82: 
   83: end_per_testcase(cluster_size = CN, Config) ->
   84:     Config1 = ensure_nodes_clustered(Config),
   85:     escalus:end_per_testcase(CN, Config1);
   86: end_per_testcase(CaseName, Config) ->
   87:     escalus:end_per_testcase(CaseName, Config).
   88: 
   89: %%--------------------------------------------------------------------
   90: %% metrics_api tests
   91: %%--------------------------------------------------------------------
   92: 
   93: message_flow(Config) ->
   94:     case metrics_helper:all_metrics_are_global(Config) of
   95:         true -> metrics_only_global(Config);
   96:         _ -> metrics_msg_flow(Config)
   97:     end.
   98: 
   99: one_client_just_logs_in(Config) ->
  100:     instrumented_story
  101:         (Config, metrics_helper:userspec(1, Config),
  102:          fun(_User1) -> end_of_story end,
  103:          %% A list of metrics and their expected relative increase
  104:          [{xmppIqSent, 0},
  105:           {xmppIqReceived, 0},
  106:           {xmppMessageSent, 0},
  107:           {xmppMessageReceived, 0},
  108:           {xmppPresenceSent, 0 + user_alpha(1)},
  109:           {xmppPresenceReceived, 0 + user_alpha(1)},
  110:           {xmppStanzaSent, 0 + user_alpha(1)},
  111:           {xmppStanzaReceived, 0 + user_alpha(1)},
  112:           {sessionSuccessfulLogins, 0 + user_alpha(1)},
  113:           {sessionLogouts, 0 + user_alpha(1)}
  114:          ]).
  115: 
  116: two_clients_just_log_in(Config) ->
  117:     instrumented_story
  118:         (Config, metrics_helper:userspec(1, 1, Config),
  119:          fun(_User1, _User2) -> end_of_story end,
  120:          [{xmppMessageSent, 0},
  121:           {xmppMessageReceived, 0},
  122:           {xmppStanzaSent, 0 + user_alpha(2)},
  123:           {xmppStanzaReceived, 0 + user_alpha(2)},
  124:           {xmppPresenceSent, 0 + user_alpha(2)},
  125:           {xmppPresenceReceived, 0 + user_alpha(2)},
  126:           {sessionSuccessfulLogins, 0 + user_alpha(2)},
  127:           {sessionLogouts, 0 + user_alpha(2)}
  128:          ]).
  129: 
  130: one_message_sent(Config) ->
  131:     instrumented_story
  132:       (Config, metrics_helper:userspec(1, 1, Config),
  133:        fun(User1, User2) ->
  134:                Chat = escalus_stanza:chat_to(User2, <<"Hi!">>),
  135:                escalus_client:send(User1, Chat),
  136:                escalus_client:wait_for_stanza(User2)
  137:        end,
  138:        [{xmppMessageSent,     1},
  139:         {xmppMessageReceived, 1}]).
  140: 
  141: one_direct_presence_sent(Config) ->
  142:     Userspec = metrics_helper:userspec(1, 1, Config),
  143:     instrumented_story
  144:       (Config, Userspec,
  145:        fun(User1, User2) ->
  146:                Presence = escalus_stanza:presence_direct(User2, <<"available">>),
  147:                escalus:send(User1, Presence),
  148:                escalus:wait_for_stanza(User2)
  149:         end,
  150:        [{xmppPresenceSent, 1 + user_alpha(2)},
  151:         {xmppPresenceReceived, 1 + user_alpha(2)},
  152:         {xmppStanzaSent, 1 + user_alpha(2)},
  153:         {xmppStanzaReceived, 1 + user_alpha(2)}]).
  154: 
  155: one_iq_sent(Config) ->
  156:     instrumented_story
  157:       (Config, metrics_helper:userspec(1, Config),
  158:        fun(User1) ->
  159:                RosterIq = escalus_stanza:roster_get(),
  160:                escalus_client:send(User1, RosterIq),
  161:                escalus_client:wait_for_stanza(User1)
  162:         end,
  163:        [{xmppIqSent, 1},
  164:         {xmppIqReceived, 1},
  165:         {modRosterGets, 1},
  166:         {xmppStanzaSent, 1 + user_alpha(1)},
  167:         {xmppStanzaReceived, 1 + user_alpha(1)}]).
  168: 
  169: one_message_error(Config) ->
  170:     instrumented_story
  171:       (Config, metrics_helper:userspec(1, Config),
  172:        fun(User1) ->
  173:                Chat = escalus_stanza:chat_to
  174:                         (<<"nobody@", (domain())/binary>>, <<"Hi!">>),
  175:                escalus_client:send(User1, Chat),
  176:                escalus_client:wait_for_stanza(User1)
  177:         end,
  178:        [{xmppErrorTotal, 1},
  179:         {xmppErrorIq, 0},
  180:         {xmppErrorMessage, 1},
  181:         {xmppErrorPresence, 0}]).
  182: 
  183: one_iq_error(Config) ->
  184:     instrumented_story
  185:       (Config, metrics_helper:userspec(1, Config),
  186:        fun(User1) ->
  187:                BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []),
  188:                escalus_client:send(User1, BadIQ),
  189:                escalus_client:wait_for_stanza(User1)
  190:         end,
  191:        [{xmppErrorTotal, 1},
  192:         {xmppErrorIq, 1},
  193:         {xmppErrorMessage, 0},
  194:         {xmppErrorPresence, 0}]).
  195: 
  196: one_presence_error(Config) ->
  197:     instrumented_story
  198:       (Config, metrics_helper:userspec(1, Config),
  199:        fun(User1) ->
  200:                BadPres = escalus_stanza:presence_direct
  201:                            (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []),
  202:                escalus_client:send(User1, BadPres),
  203:                escalus_client:wait_for_stanza(User1)
  204:         end,
  205:        [{xmppErrorTotal, 1},
  206:         {xmppErrorIq, 0},
  207:         {xmppErrorMessage, 0},
  208:         {xmppErrorPresence, 1}]).
  209: 
  210: session_counters(Config) ->
  211:     Names = [totalSessionCount, uniqueSessionCount, nodeSessionCount],
  212:     escalus:story
  213:       (Config, [{alice, 2}, {bob, 1}],
  214:        fun(_User11, _User12, _User2) ->
  215:                %% Force update
  216:                lists:foreach(fun metrics_helper:sample/1, Names),
  217:                ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)),
  218:                ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)),
  219:                ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config))
  220:        end).
  221: 
  222: node_uptime(Config) ->
  223:       X = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  224:       timer:sleep(timer:seconds(1)),
  225:       Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  226:       ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]).
  227: 
  228: cluster_size(Config) ->
  229:       SingleNodeClusterState =
  230:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  231:       ?assertEqual(1, SingleNodeClusterState),
  232: 
  233:       distributed_helper:add_node_to_cluster(Config),
  234:       TwoNodesClusterState =
  235:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  236:       ?assertEqual(2, TwoNodesClusterState),
  237: 
  238:       distributed_helper:remove_node_from_cluster(Config),
  239:       SingleNodeClusterState2 =
  240:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  241:       ?assertEqual(1, SingleNodeClusterState2).
  242: 
  243: %%--------------------------------------------------------------------
  244: %% Helpers
  245: %%--------------------------------------------------------------------
  246: 
  247: metrics_only_global(_Config) ->
  248:     Port = ct:get_config({hosts, mim2, metrics_rest_port}),
  249:     % 0. GET is the only implemented allowed method
  250:     % (both OPTIONS and HEAD are for free then)
  251:     Res = simple_request(<<"OPTIONS">>, "/metrics/", Port),
  252:     {_S, H, _B} = Res,
  253:     assert_status(200, Res),
  254:     V = proplists:get_value(<<"allow">>, H),
  255:     Opts = string:split(V, ", ", all),
  256:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  257: 
  258:     % List of host types and metrics
  259:     Res2 = simple_request(<<"GET">>, "/metrics/", Port),
  260:     {_S2, _H2, B2} = Res2,
  261:     assert_status(200, Res2),
  262:     #{<<"host_types">> := [_ExampleHostType | _],
  263:       <<"metrics">> := [],
  264:       <<"global">> := [ExampleGlobal | _]} = B2,
  265: 
  266:     % All global metrics
  267:     Res3 = simple_request(<<"GET">>, "/metrics/global", Port),
  268:     {_S3, _H3, B3} = Res3,
  269:     assert_status(200, Res3),
  270:     #{<<"metrics">> := _ML} = B3,
  271:     ?assertEqual(1, maps:size(B3)),
  272: 
  273:     % An example global metric
  274:     Res4 = simple_request(<<"GET">>,
  275:                           unicode:characters_to_list(["/metrics/global/", ExampleGlobal]),
  276:                           Port),
  277:     {_S4, _H4, B4} = Res4,
  278:     #{<<"metric">> := _} = B4,
  279:     ?assertEqual(1, maps:size(B4)).
  280: 
  281: metrics_msg_flow(_Config) ->
  282:     % 0. GET is the only implemented allowed method
  283:     % (both OPTIONS and HEAD are for free then)
  284:     Res = simple_request(<<"OPTIONS">>, "/metrics/", ?PORT),
  285:     {_S, H, _B} = Res,
  286:     assert_status(200, Res),
  287:     V = proplists:get_value(<<"allow">>, H),
  288:     Opts = string:split(V, ", ", all),
  289:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  290: 
  291:     % List of host types and metrics
  292:     Res2 = simple_request(<<"GET">>, "/metrics/", ?PORT),
  293:     {_S2, _H2, B2} = Res2,
  294:     assert_status(200, Res2),
  295:     #{<<"host_types">> := [ExampleHostType | _],
  296:       <<"metrics">> := [ExampleMetric | _],
  297:       <<"global">> := [ExampleGlobal | _]} = B2,
  298: 
  299:     % Sum of all metrics
  300:     Res3 = simple_request(<<"GET">>, "/metrics/all", ?PORT),
  301:     {_S3, _H3, B3} = Res3,
  302:     assert_status(200, Res3),
  303:     #{<<"metrics">> := _ML} = B3,
  304:     ?assertEqual(1, maps:size(B3)),
  305: 
  306:     % Sum for a given metric
  307:     Res4 = simple_request(<<"GET">>,
  308:                           unicode:characters_to_list(["/metrics/all/", ExampleMetric]),
  309:                           ?PORT),
  310:     {_S4, _H4, B4} = Res4,
  311:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4,
  312:     ?assertEqual(2, maps:size(IM)),
  313:     ?assertEqual(1, maps:size(B4)),
  314: 
  315:     % Negative case for a non-existent given metric
  316:     Res5 = simple_request(<<"GET">>, "/metrics/all/nonExistentMetric", ?PORT),
  317:     assert_status(404, Res5),
  318: 
  319:     % All metrics for an example host type
  320:     Res6 = simple_request(<<"GET">>,
  321:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType]),
  322:                           ?PORT),
  323:     {_S6, _H6, B6} = Res6,
  324:     #{<<"metrics">> := _} = B6,
  325:     ?assertEqual(1, maps:size(B6)),
  326: 
  327:     % Negative case for a non-existent host type
  328:     Res7 = simple_request(<<"GET">>, "/metrics/host_type/nonExistentHostType", ?PORT),
  329:     assert_status(404, Res7),
  330: 
  331:     % An example metric for an example host type
  332:     Res8 = simple_request(<<"GET">>,
  333:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType,
  334:                                                "/", ExampleMetric]),
  335:                           ?PORT),
  336:     {_S8, _H8, B8} = Res8,
  337:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8,
  338:     ?assertEqual(2, maps:size(IM2)),
  339:     ?assertEqual(1, maps:size(B8)),
  340: 
  341:     % Negative case for a non-existent (host type, metric) pair
  342:     Res9 = simple_request(<<"GET">>,
  343:                           unicode:characters_to_list(["/metrics/host_type/", ExampleHostType,
  344:                                                "/nonExistentMetric"]),
  345:                           ?PORT),
  346:     assert_status(404, Res9),
  347: 
  348:     % All global metrics
  349:     Res10 = simple_request(<<"GET">>, "/metrics/global", ?PORT),
  350:     {_, _, B10} = Res10,
  351:     #{<<"metrics">> := _} = B10,
  352:     ?assertEqual(1, maps:size(B10)),
  353: 
  354:     Res11 = simple_request(<<"GET">>,
  355:                            unicode:characters_to_list(["/metrics/global/", ExampleGlobal]),
  356:                            ?PORT),
  357:     {_, _, B11} = Res11,
  358:     #{<<"metric">> := _} = B11,
  359:     ?assertEqual(1, maps:size(B11)).
  360: 
  361: user_alpha(NumberOfUsers) ->
  362:     %% This represents the overhead of logging in N users via escalus:story/3
  363:     %% For each user,
  364:     %%     xmppStanza(sent|received)
  365:     %%     and
  366:     %%     xmppPresence(sent|received)
  367:     %% will be bumped by +1 at login.
  368:     NumberOfUsers.
  369: 
  370: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) ->
  371:     Befores = fetch_all(Config, CounterSpecs),
  372:     StoryResult = escalus:story(Config, UsersSpecs, StoryFun),
  373:     Afters =  fetch_all(Config, CounterSpecs),
  374:     [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters))
  375:       || {Name, N} <- CounterSpecs ],
  376:     StoryResult.
  377: 
  378: fetch_all(Config, CounterSpecs) ->
  379:     FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of
  380:                           true -> fun fetch_global_spiral_values/2;
  381:                           _ -> fun fetch_counter_value/2
  382:                       end,
  383:     [ {Counter, FetchCounterFun(Counter, Config)}
  384:       || {Counter, _} <- CounterSpecs ].
  385: 
  386: find(CounterName, CounterList) ->
  387:     case lists:keyfind(CounterName, 1, CounterList) of
  388:         false -> error(counter_defined_incorrectly);
  389:         {CounterName, Val} -> Val end.
  390: 
  391: fetch_counter_value(Counter, _Config) ->
  392:     Metric = atom_to_binary(Counter, utf8),
  393: 
  394:     HostType = host_type(),
  395:     HostTypeName = metrics_helper:make_host_type_name(HostType),
  396: 
  397:     Result = simple_request(<<"GET">>,
  398:                             unicode:characters_to_list(["/metrics/host_type/", HostTypeName, "/", Metric]),
  399:                             ?PORT),
  400:     {_S, _H, B} = Result,
  401:     assert_status(200, Result),
  402:     #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B,
  403: 
  404:     Result2 = simple_request(<<"GET">>,
  405:                              unicode:characters_to_list(["/metrics/host_type/", HostTypeName]),
  406:                              ?PORT),
  407:     {_S2, _H2, B2} = Result2,
  408:     assert_status(200, Result2),
  409:     #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2,
  410: 
  411:     Result3 = simple_request(<<"GET">>,
  412:                              unicode:characters_to_list(["/metrics/all/", Metric]),
  413:                              ?PORT),
  414:     {_S3, _H3, B3} = Result3,
  415:     assert_status(200, Result3),
  416:     #{<<"metric">> := #{<<"count">> := TotalValue}} = B3,
  417: 
  418:     Result4 = simple_request(<<"GET">>, "/metrics/all/", ?PORT),
  419:     {_S4, _H4, B4} = Result4,
  420:     assert_status(200, Result4),
  421:     #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4,
  422: 
  423:     [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList].
  424: 
  425: %% @doc Fetch counter that is static.
  426: fetch_global_gauge_value(Counter, Config) ->
  427:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  428:     ?assertEqual(Value, ValueList, [{counter, Counter}]),
  429:     Value.
  430: 
  431: %% @doc Fetch counter that can be incremented by server between two API requests.
  432: %%
  433: %% Returns last actual value
  434: fetch_global_incrementing_gauge_value(Counter, Config) ->
  435:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  436:     ?assertEqual(true, Value =< ValueList, [{counter, Counter},
  437:                                                    {value, Value},
  438:                                                    {value_list, ValueList}]),
  439:     ValueList.
  440: 
  441: fetch_global_gauge_values(Counter, Config) ->
  442:     fetch_global_counter_values(<<"value">>, Counter, Config).
  443: 
  444: fetch_global_spiral_values(Counter, Config) ->
  445:     % Spirals have two values associated with the metric: "one" and "count".
  446:     % We are interested in the latter.
  447:     fetch_global_counter_values(<<"count">>, Counter, Config).
  448: 
  449: fetch_global_counter_values(MetricKey, Counter, Config) ->
  450:     Metric = atom_to_binary(Counter, utf8),
  451: 
  452:     Port = case metrics_helper:all_metrics_are_global(Config) of
  453:                true ->
  454:                    ct:get_config({hosts, mim2, metrics_rest_port});
  455:                _ -> ct:get_config({hosts, mim, metrics_rest_port})
  456:            end,
  457: 
  458:     Result = simple_request(<<"GET">>,
  459:                             unicode:characters_to_list(["/metrics/global/", Metric]),
  460:                             Port),
  461:     assert_status(200, Result),
  462:     {_S, H, B} = Result,
  463:     #{<<"metric">> := #{MetricKey := Value}} = B,
  464:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)),
  465:     ?assertEqual(1, maps:size(B)),
  466: 
  467:     Result2 = simple_request(<<"GET">>,
  468:                              unicode:characters_to_list(["/metrics/global/"]),
  469:                              Port),
  470:     assert_status(200, Result2),
  471:     {_S2, H2, B2} = Result2,
  472:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)),
  473:     #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2,
  474:     ?assertEqual(1, maps:size(B2)),
  475: 
  476:     [Value, ValueList].
  477: 
  478: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) ->
  479:     ExpectedCounters = [Counter+Inc || Counter <- Counters1],
  480:     case ExpectedCounters == Counters2 of
  481:         false ->
  482:             ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]),
  483:             error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)});
  484:         true -> ok
  485:     end;
  486: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 ->
  487:     ok.
  488: 
  489: get_diffs(L1, L2) ->
  490:     lists:zip(L1, L2).
  491: 
  492: ensure_nodes_not_clustered(Config) ->
  493:     #{node := Node1Name} = RPCNode = mim(),
  494:     Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]),
  495: 
  496:     Nodes = [Node || Node <- Nodes1, Node =/= Node1Name],
  497:     [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes],
  498:     Config ++ [{nodes_clustered, Nodes}].
  499: 
  500: ensure_nodes_clustered(Config) ->
  501:     NodesToBeClustered = proplists:get_value(nodes_clustered, Config),
  502:     [distributed_helper:add_node_to_cluster(N, Config)
  503:      || N <- NodesToBeClustered],
  504:     Config.