1: %%==============================================================================
    2: %% Copyright 2020 Erlang Solutions Ltd.
    3: %%
    4: %% Licensed under the Apache License, Version 2.0 (the "License");
    5: %% you may not use this file except in compliance with the License.
    6: %% You may obtain a copy of the License at
    7: %%
    8: %% http://www.apache.org/licenses/LICENSE-2.0
    9: %%
   10: %% Unless required by applicable law or agreed to in writing, software
   11: %% distributed under the License is distributed on an "AS IS" BASIS,
   12: %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   13: %% See the License for the specific language governing permissions and
   14: %% limitations under the License.
   15: %%==============================================================================
   16: -module(metrics_api_SUITE).
   17: -compile([export_all, nowarn_export_all]).
   18: 
   19: -import(distributed_helper, [mim/0, mim2/0, rpc/4]).
   20: -import(rest_helper, [assert_status/2, make_request/1]).
   21: 
   22: -include_lib("eunit/include/eunit.hrl").
   23: 
   24: -import(domain_helper, [host_type/0, domain/0]).
   25: 
   26: %%--------------------------------------------------------------------
   27: %% Suite configuration
   28: %%--------------------------------------------------------------------
   29: all() ->
   30:     [
   31:      {group, metrics},
   32:      {group, all_metrics_are_global},
   33:      {group, global}
   34:     ].
   35: 
   36: -define(METRICS_CASES, [
   37:                         message_flow,
   38:                         one_client_just_logs_in,
   39:                         two_clients_just_log_in,
   40:                         one_message_sent,
   41:                         one_direct_presence_sent,
   42:                         one_iq_sent,
   43:                         one_message_error,
   44:                         one_iq_error,
   45:                         one_presence_error
   46:                        ]).
   47: 
   48: groups() ->
   49:     [
   50:      {metrics, [], [non_existent_metrics | ?METRICS_CASES]},
   51:      {all_metrics_are_global, [], ?METRICS_CASES},
   52:      {global, [], [session_counters,
   53:                    node_uptime,
   54:                    cluster_size]}
   55:     ].
   56: 
   57: init_per_suite(Config) ->
   58:     HostType = host_type(),
   59:     Config1 = dynamic_modules:save_modules(HostType, Config),
   60:     dynamic_modules:ensure_stopped(HostType, [mod_offline]),
   61:     escalus:init_per_suite(Config1).
   62: 
   63: end_per_suite(Config) ->
   64:     dynamic_modules:restore_modules(Config),
   65:     escalus:end_per_suite(Config).
   66: 
   67: init_per_group(GroupName, Config) ->
   68:     metrics_helper:prepare_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   69: 
   70: end_per_group(GroupName, Config) ->
   71:     metrics_helper:finalise_by_all_metrics_are_global(Config, GroupName =:= all_metrics_are_global).
   72: 
   73: init_per_testcase(cluster_size = CN, Config) ->
   74:     case distributed_helper:has_mnesia(mim()) of
   75:         true ->
   76:             Config1 = ensure_nodes_not_clustered(Config),
   77:             escalus:init_per_testcase(CN, Config1);
   78:         false ->
   79:             {skip, "Requires Mnesia"}
   80:     end;
   81: init_per_testcase(CaseName, Config) ->
   82:     escalus:init_per_testcase(CaseName, Config).
   83: 
   84: end_per_testcase(cluster_size = CN, Config) ->
   85:     Config1 = ensure_nodes_clustered(Config),
   86:     escalus:end_per_testcase(CN, Config1);
   87: end_per_testcase(CaseName, Config) ->
   88:     escalus:end_per_testcase(CaseName, Config).
   89: 
   90: %%--------------------------------------------------------------------
   91: %% metrics_api tests
   92: %%--------------------------------------------------------------------
   93: 
   94: non_existent_metrics(_Config) ->
   95:     IncompleteName = "backends",
   96:     GlobalMetricName = "adhoc_local_commands",
   97:     HostType = metrics_helper:make_host_type_name(host_type()),
   98:     assert_status(404, request(<<"GET">>, "/metrics/all/" ++ IncompleteName)),
   99:     assert_status(404, request(<<"GET">>, "/metrics/all/badMetric")),
  100:     assert_status(404, request(<<"GET">>, "/metrics/global/" ++ IncompleteName)),
  101:     assert_status(404, request(<<"GET">>, "/metrics/global/badMetric")),
  102:     assert_status(404, request(<<"GET">>, "/metrics/host_type/badHostType")),
  103:     assert_status(404, request(<<"GET">>, "/metrics/host_type/badHostType/xmppStanzaCount")),
  104:     assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/", GlobalMetricName])),
  105:     assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/badMetric"])).
  106: 
  107: message_flow(Config) ->
  108:     case metrics_helper:all_metrics_are_global(Config) of
  109:         true -> metrics_only_global(Config);
  110:         _ -> metrics_msg_flow(Config)
  111:     end.
  112: 
  113: one_client_just_logs_in(Config) ->
  114:     instrumented_story
  115:         (Config, metrics_helper:userspec(1, Config),
  116:          fun(_User1) -> end_of_story end,
  117:          %% A list of metrics and their expected relative increase
  118:          [{xmppIqSent, 0 + user_alpha(2)},
  119:           {xmppIqReceived, 0 + user_alpha(2)},
  120:           {xmppMessageSent, 0},
  121:           {xmppMessageReceived, 0},
  122:           {xmppPresenceSent, 0 + user_alpha(1)},
  123:           {xmppPresenceReceived, 0 + user_alpha(1)},
  124:           {xmppStanzaSent, 0 + user_alpha(3)},
  125:           {xmppStanzaReceived, 0 + user_alpha(3)},
  126:           {sessionSuccessfulLogins, 0 + user_alpha(1)},
  127:           {sessionLogouts, 0 + user_alpha(1)}
  128:          ]).
  129: 
  130: two_clients_just_log_in(Config) ->
  131:     instrumented_story
  132:         (Config, metrics_helper:userspec(1, 1, Config),
  133:          fun(_User1, _User2) -> end_of_story end,
  134:          [{xmppIqSent, 0 + user_alpha(4)},
  135:           {xmppIqReceived, 0 + user_alpha(4)},
  136:           {xmppMessageSent, 0},
  137:           {xmppMessageReceived, 0},
  138:           {xmppPresenceSent, 0 + user_alpha(2)},
  139:           {xmppPresenceReceived, 0 + user_alpha(2)},
  140:           {xmppStanzaSent, 0 + user_alpha(6)},
  141:           {xmppStanzaReceived, 0 + user_alpha(6)},
  142:           {sessionSuccessfulLogins, 0 + user_alpha(2)},
  143:           {sessionLogouts, 0 + user_alpha(2)}
  144:          ]).
  145: 
  146: one_message_sent(Config) ->
  147:     instrumented_story
  148:       (Config, metrics_helper:userspec(1, 1, Config),
  149:        fun(User1, User2) ->
  150:                Chat = escalus_stanza:chat_to(User2, <<"Hi!">>),
  151:                escalus_client:send(User1, Chat),
  152:                escalus_client:wait_for_stanza(User2)
  153:        end,
  154:        [{xmppMessageSent,     1},
  155:         {xmppMessageReceived, 1}]).
  156: 
  157: one_direct_presence_sent(Config) ->
  158:     Userspec = metrics_helper:userspec(1, 1, Config),
  159:     instrumented_story
  160:       (Config, Userspec,
  161:        fun(User1, User2) ->
  162:                Presence = escalus_stanza:presence_direct(User2, <<"available">>),
  163:                escalus:send(User1, Presence),
  164:                escalus:wait_for_stanza(User2)
  165:         end,
  166:        [{xmppPresenceSent, 1 + user_alpha(2)},
  167:         {xmppPresenceReceived, 1 + user_alpha(2)},
  168:         {xmppStanzaSent, 1 + user_alpha(6)},
  169:         {xmppStanzaReceived, 1 + user_alpha(6)}]).
  170: 
  171: one_iq_sent(Config) ->
  172:     instrumented_story
  173:       (Config, metrics_helper:userspec(1, Config),
  174:        fun(User1) ->
  175:                RosterIq = escalus_stanza:roster_get(),
  176:                escalus_client:send(User1, RosterIq),
  177:                escalus_client:wait_for_stanza(User1)
  178:         end,
  179:        [{xmppIqSent, 3},
  180:         {xmppIqReceived, 3},
  181:         {modRosterGets, 1},
  182:         {xmppStanzaSent, 1 + user_alpha(3)},
  183:         {xmppStanzaReceived, 1 + user_alpha(3)}]).
  184: 
  185: one_message_error(Config) ->
  186:     instrumented_story
  187:       (Config, metrics_helper:userspec(1, Config),
  188:        fun(User1) ->
  189:                Chat = escalus_stanza:chat_to
  190:                         (<<"nobody@", (domain())/binary>>, <<"Hi!">>),
  191:                escalus_client:send(User1, Chat),
  192:                escalus_client:wait_for_stanza(User1)
  193:         end,
  194:        [{xmppErrorTotal, 1},
  195:         {xmppErrorIq, 0},
  196:         {xmppErrorMessage, 1},
  197:         {xmppErrorPresence, 0}]).
  198: 
  199: one_iq_error(Config) ->
  200:     instrumented_story
  201:       (Config, metrics_helper:userspec(1, Config),
  202:        fun(User1) ->
  203:                BadIQ = escalus_stanza:iq_set(<<"BadNS">>, []),
  204:                escalus_client:send(User1, BadIQ),
  205:                escalus_client:wait_for_stanza(User1)
  206:         end,
  207:        [{xmppErrorTotal, 1},
  208:         {xmppErrorIq, 1},
  209:         {xmppErrorMessage, 0},
  210:         {xmppErrorPresence, 0}]).
  211: 
  212: one_presence_error(Config) ->
  213:     instrumented_story
  214:       (Config, metrics_helper:userspec(1, Config),
  215:        fun(User1) ->
  216:                BadPres = escalus_stanza:presence_direct
  217:                            (<<(domain())/binary, "/no-such-resource">>, <<"subscribed">>, []),
  218:                escalus_client:send(User1, BadPres),
  219:                escalus_client:wait_for_stanza(User1)
  220:         end,
  221:        [{xmppErrorTotal, 1},
  222:         {xmppErrorIq, 0},
  223:         {xmppErrorMessage, 0},
  224:         {xmppErrorPresence, 1}]).
  225: 
  226: session_counters(Config) ->
  227:     Names = [totalSessionCount, uniqueSessionCount, nodeSessionCount],
  228:     escalus:story
  229:       (Config, [{alice, 2}, {bob, 1}],
  230:        fun(_User11, _User12, _User2) ->
  231:             %% Force update
  232:             lists:foreach(fun metrics_helper:sample/1, Names),
  233:             timer:sleep(timer:seconds(1)),
  234: 
  235:             ?assertEqual(3, fetch_global_gauge_value(totalSessionCount, Config)),
  236:             ?assertEqual(2, fetch_global_gauge_value(uniqueSessionCount, Config)),
  237:             ?assertEqual(3, fetch_global_gauge_value(nodeSessionCount, Config))
  238:        end).
  239: 
  240: node_uptime(Config) ->
  241:       X = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  242:       timer:sleep(timer:seconds(1)),
  243:       Y = fetch_global_incrementing_gauge_value(nodeUpTime, Config),
  244:       ?assertEqual(true, Y > X, [{counter, nodeUpTime}, {first, X}, {second, Y}]).
  245: 
  246: cluster_size(Config) ->
  247:       SingleNodeClusterState =
  248:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  249:       ?assertEqual(1, SingleNodeClusterState),
  250: 
  251:       distributed_helper:add_node_to_cluster(Config),
  252:       TwoNodesClusterState =
  253:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  254:       ?assertEqual(2, TwoNodesClusterState),
  255: 
  256:       distributed_helper:remove_node_from_cluster(Config),
  257:       SingleNodeClusterState2 =
  258:             fetch_global_incrementing_gauge_value(clusterSize, Config),
  259:       ?assertEqual(1, SingleNodeClusterState2).
  260: 
  261: %%--------------------------------------------------------------------
  262: %% Helpers
  263: %%--------------------------------------------------------------------
  264: 
  265: metrics_only_global(_Config) ->
  266:     % 0. GET is the only implemented allowed method
  267:     % (both OPTIONS and HEAD are for free then)
  268:     Res = request(<<"OPTIONS">>, "/metrics/", mim2()),
  269:     {_S, H, _B} = Res,
  270:     assert_status(200, Res),
  271:     V = proplists:get_value(<<"allow">>, H),
  272:     Opts = string:split(V, ", ", all),
  273:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  274: 
  275:     % List of host types and metrics
  276:     Res2 = request(<<"GET">>, "/metrics/", mim2()),
  277:     {_S2, _H2, B2} = Res2,
  278:     assert_status(200, Res2),
  279:     #{<<"host_types">> := [_ExampleHostType | _],
  280:       <<"metrics">> := [],
  281:       <<"global">> := [ExampleGlobal | _]} = B2,
  282: 
  283:     % All global metrics
  284:     Res3 = request(<<"GET">>, "/metrics/global", mim2()),
  285:     {_S3, _H3, B3} = Res3,
  286:     assert_status(200, Res3),
  287:     #{<<"metrics">> := _ML} = B3,
  288:     ?assertEqual(1, maps:size(B3)),
  289: 
  290:     % An example global metric
  291:     Res4 = request(<<"GET">>, ["/metrics/global/", ExampleGlobal], mim2()),
  292:     {_S4, _H4, B4} = Res4,
  293:     #{<<"metric">> := _} = B4,
  294:     ?assertEqual(1, maps:size(B4)).
  295: 
  296: metrics_msg_flow(_Config) ->
  297:     % 0. GET is the only implemented allowed method
  298:     % (both OPTIONS and HEAD are for free then)
  299:     Res = request(<<"OPTIONS">>, "/metrics/"),
  300:     {_S, H, _B} = Res,
  301:     assert_status(200, Res),
  302:     V = proplists:get_value(<<"allow">>, H),
  303:     Opts = string:split(V, ", ", all),
  304:     ?assertEqual([<<"GET">>,<<"HEAD">>,<<"OPTIONS">>], lists:sort(Opts)),
  305: 
  306:     % List of host types and metrics
  307:     Res2 = request(<<"GET">>, "/metrics/"),
  308:     {_S2, _H2, B2} = Res2,
  309:     assert_status(200, Res2),
  310:     #{<<"host_types">> := [ExampleHostType | _],
  311:       <<"metrics">> := [ExampleMetric | _],
  312:       <<"global">> := [ExampleGlobal | _]} = B2,
  313: 
  314:     % Sum of all metrics
  315:     Res3 = request(<<"GET">>, "/metrics/all"),
  316:     {_S3, _H3, B3} = Res3,
  317:     assert_status(200, Res3),
  318:     #{<<"metrics">> := _ML} = B3,
  319:     ?assertEqual(1, maps:size(B3)),
  320: 
  321:     % Sum for a given metric
  322:     Res4 = request(<<"GET">>, ["/metrics/all/", ExampleMetric]),
  323:     {_S4, _H4, B4} = Res4,
  324:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM} = B4,
  325:     ?assertEqual(2, maps:size(IM)),
  326:     ?assertEqual(1, maps:size(B4)),
  327: 
  328:     % All metrics for an example host type
  329:     Res6 = request(<<"GET">>, ["/metrics/host_type/", ExampleHostType]),
  330:     {_S6, _H6, B6} = Res6,
  331:     #{<<"metrics">> := _} = B6,
  332:     ?assertEqual(1, maps:size(B6)),
  333: 
  334:     % An example metric for an example host type
  335:     Res8 = request(<<"GET">>, ["/metrics/host_type/", ExampleHostType, "/", ExampleMetric]),
  336:     {_S8, _H8, B8} = Res8,
  337:     #{<<"metric">> := #{<<"one">> := _, <<"count">> := _} = IM2} = B8,
  338:     ?assertEqual(2, maps:size(IM2)),
  339:     ?assertEqual(1, maps:size(B8)),
  340: 
  341:     % All global metrics
  342:     Res10 = request(<<"GET">>, "/metrics/global"),
  343:     {_, _, B10} = Res10,
  344:     #{<<"metrics">> := _} = B10,
  345:     ?assertEqual(1, maps:size(B10)),
  346: 
  347:     Res11 = request(<<"GET">>, ["/metrics/global/", ExampleGlobal]),
  348:     {_, _, B11} = Res11,
  349:     #{<<"metric">> := _} = B11,
  350:     ?assertEqual(1, maps:size(B11)).
  351: 
  352: user_alpha(NumberOfUsers) ->
  353:     %% This represents the overhead of logging in N users via escalus:story/3
  354:     %% For each user,
  355:     %%     xmppStanza(sent|received)
  356:     %%     and
  357:     %%     xmppPresence(sent|received)
  358:     %% will be bumped by +1 at login.
  359:     NumberOfUsers.
  360: 
  361: instrumented_story(Config, UsersSpecs, StoryFun, CounterSpecs) ->
  362:     Befores = fetch_all(Config, CounterSpecs),
  363:     StoryResult = escalus:story(Config, UsersSpecs, StoryFun),
  364:     Afters = fetch_all(Config, CounterSpecs),
  365:     [ assert_counter_inc(Name, N, find(Name, Befores), find(Name, Afters))
  366:       || {Name, N} <- CounterSpecs ],
  367:     StoryResult.
  368: 
  369: fetch_all(Config, CounterSpecs) ->
  370:     FetchCounterFun = case metrics_helper:all_metrics_are_global(Config) of
  371:                           true -> fun fetch_global_spiral_values/2;
  372:                           _ -> fun fetch_counter_value/2
  373:                       end,
  374:     [ {Counter, FetchCounterFun(Counter, Config)}
  375:       || {Counter, _} <- CounterSpecs ].
  376: 
  377: find(CounterName, CounterList) ->
  378:     case lists:keyfind(CounterName, 1, CounterList) of
  379:         false -> error(counter_defined_incorrectly);
  380:         {CounterName, Val} -> Val end.
  381: 
  382: fetch_counter_value(Counter, _Config) ->
  383:     Metric = atom_to_binary(Counter, utf8),
  384: 
  385:     HostType = host_type(),
  386:     HostTypeName = metrics_helper:make_host_type_name(HostType),
  387: 
  388:     Result = request(<<"GET">>, ["/metrics/host_type/", HostTypeName, "/", Metric]),
  389:     {_S, _H, B} = Result,
  390:     assert_status(200, Result),
  391:     #{<<"metric">> := #{<<"count">> := HostTypeValue}} = B,
  392: 
  393:     Result2 = request(<<"GET">>, ["/metrics/host_type/", HostTypeName]),
  394:     {_S2, _H2, B2} = Result2,
  395:     assert_status(200, Result2),
  396:     #{<<"metrics">> := #{Metric := #{<<"count">> := HostTypeValueList}}} = B2,
  397: 
  398:     Result3 = request(<<"GET">>, ["/metrics/all/", Metric]),
  399:     {_S3, _H3, B3} = Result3,
  400:     assert_status(200, Result3),
  401:     #{<<"metric">> := #{<<"count">> := TotalValue}} = B3,
  402: 
  403:     Result4 = request(<<"GET">>, "/metrics/all/"),
  404:     {_S4, _H4, B4} = Result4,
  405:     assert_status(200, Result4),
  406:     #{<<"metrics">> := #{Metric := #{<<"count">> := TotalValueList}}} = B4,
  407: 
  408:     [HostTypeValue, HostTypeValueList, TotalValue, TotalValueList].
  409: 
  410: %% @doc Fetch counter that is static.
  411: fetch_global_gauge_value(Counter, Config) ->
  412:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  413:     ?assertEqual(Value, ValueList, [{counter, Counter}]),
  414:     Value.
  415: 
  416: %% @doc Fetch counter that can be incremented by server between two API requests.
  417: %%
  418: %% Returns last actual value
  419: fetch_global_incrementing_gauge_value(Counter, Config) ->
  420:     [Value, ValueList] = fetch_global_gauge_values(Counter, Config),
  421:     ?assertEqual(true, Value =< ValueList, [{counter, Counter},
  422:                                             {value, Value},
  423:                                             {value_list, ValueList}]),
  424:     ValueList.
  425: 
  426: fetch_global_gauge_values(Counter, Config) ->
  427:     fetch_global_counter_values(<<"value">>, Counter, Config).
  428: 
  429: fetch_global_spiral_values(Counter, Config) ->
  430:     % Spirals have two values associated with the metric: "one" and "count".
  431:     % We are interested in the latter.
  432:     fetch_global_counter_values(<<"count">>, Counter, Config).
  433: 
  434: fetch_global_counter_values(MetricKey, Counter, Config) ->
  435:     Metric = atom_to_binary(Counter, utf8),
  436: 
  437:     Server = case metrics_helper:all_metrics_are_global(Config) of
  438:                  true -> mim2();
  439:                  _ -> mim()
  440:              end,
  441: 
  442:     Result = request(<<"GET">>, ["/metrics/global/", Metric], Server),
  443:     assert_status(200, Result),
  444:     {_S, H, B} = Result,
  445:     #{<<"metric">> := #{MetricKey := Value}} = B,
  446:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H)),
  447:     ?assertEqual(1, maps:size(B)),
  448: 
  449:     Result2 = request(<<"GET">>, ["/metrics/global/"], Server),
  450:     assert_status(200, Result2),
  451:     {_S2, H2, B2} = Result2,
  452:     ?assertEqual(<<"application/json">>, proplists:get_value(<<"content-type">>, H2)),
  453:     #{<<"metrics">> := #{Metric := #{MetricKey := ValueList}}} = B2,
  454:     ?assertEqual(1, maps:size(B2)),
  455: 
  456:     [Value, ValueList].
  457: 
  458: assert_counter_inc(Name, Inc, Counters1, Counters2) when is_list(Counters1) ->
  459:     ExpectedCounters = [Counter+Inc || Counter <- Counters1],
  460:     case ExpectedCounters == Counters2 of
  461:         false ->
  462:             ct:comment("Expected ~w, got: ~w", [ExpectedCounters, Counters2]),
  463:             error({unexpected_values, Name, get_diffs(ExpectedCounters, Counters2)});
  464:         true -> ok
  465:     end;
  466: assert_counter_inc(_Name, Inc, Counter1, Counter2) when Counter1 + Inc =:= Counter2 ->
  467:     ok.
  468: 
  469: get_diffs(L1, L2) ->
  470:     lists:zip(L1, L2).
  471: 
  472: ensure_nodes_not_clustered(Config) ->
  473:     #{node := Node1Name} = RPCNode = mim(),
  474:     Nodes1 = rpc(RPCNode, mnesia, system_info, [running_db_nodes]),
  475: 
  476:     Nodes = [Node || Node <- Nodes1, Node =/= Node1Name],
  477:     [distributed_helper:remove_node_from_cluster(#{node => N}, Config) || N <- Nodes],
  478:     Config ++ [{nodes_clustered, Nodes}].
  479: 
  480: ensure_nodes_clustered(Config) ->
  481:     NodesToBeClustered = proplists:get_value(nodes_clustered, Config),
  482:     [distributed_helper:add_node_to_cluster(N, Config)
  483:      || N <- NodesToBeClustered],
  484:     Config.
  485: 
  486: request(Method, Path) ->
  487:     make_request(#{role => admin, method => Method, path => iolist_to_binary(Path),
  488:                    return_headers => true, return_maps => true}).
  489: 
  490: request(Method, Path, Server) ->
  491:     make_request(#{role => admin, method => Method, path => iolist_to_binary(Path),
  492:                    return_headers => true, return_maps => true, server => Server}).