8000 Merge pull request #12663 from JimMoen/EMQX-11897/fix-cpu-usage/api · emqx/emqx@20cd47a · GitHub
[go: up one dir, main page]

Skip to content

Commit 20cd47a

Browse files
authored
Merge pull request #12663 from JimMoen/EMQX-11897/fix-cpu-usage/api
fix(vm): cpu usage/idle handled by single worker
2 parents f24a76e + 0edeff4 commit 20cd47a

File tree

9 files changed

+136
-43
lines changed

9 files changed

+136
-43
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
%%--------------------------------------------------------------------
2+
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
3+
%%
4+
%% Licensed under the Apache License, Version 2.0 (the "License");
5+
%% you may not use this file except in compliance with the License.
6+
%% You may obtain a copy of the License at
7+
%%
8+
%% http://www.apache.org/licenses/LICENSE-2.0
9+
%%
10+
%% Unless required by applicable law or agreed to in writing, software
11+
%% distributed under the License is distributed on an "AS IS" BASIS,
12+
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
%% See the License for the specific language governing permissions and
14+
%% limitations under the License.
15+
%%--------------------------------------------------------------------
16+
17+
-module(emqx_cpu_sup_worker).
18+
19+
-behaviour(gen_server).
20+
21+
-include("logger.hrl").
22+
23+
%% gen_server APIs
24+
-export([start_link/0]).
25+
26+
-export([
27+
cpu_util/0,
28+
cpu_util/1
29+
]).
30+
31+
%% gen_server callbacks
32+
-export([
33+
init/1,
34+
handle_continue/2,
35+
handle_call/3,
36+
handle_cast/2,
37+
terminate/2,
38+
code_change/3
39+
]).
40+
41+
-define(CPU_USAGE_WORKER, ?MODULE).
42+
43+
%%--------------------------------------------------------------------
44+
%% API
45+
%%--------------------------------------------------------------------
46+
47+
cpu_util() ->
48+
gen_server:call(?CPU_USAGE_WORKER, ?FUNCTION_NAME, infinity).
49+
50+
cpu_util(Args) ->
51+
gen_server:call(?CPU_USAGE_WORKER, {?FUNCTION_NAME, Args}, infinity).
52+
53+
%%--------------------------------------------------------------------
54+
%% gen_server callbacks
55+
%% simply handle cpu_sup:util/0,1 called in one process
56+
%%--------------------------------------------------------------------
57+
58+
start_link() ->
59+
gen_server:start_link({local, ?CPU_USAGE_WORKER}, ?MODULE, [], []).
60+
61+
init([]) ->
62+
{ok, undefined, {continue, setup}}.
63+
64+
handle_continue(setup, undefined) ->
65+
%% start os_mon temporarily
66+
{ok, _} = application:ensure_all_started(os_mon),
67+
%% The returned value of the first call to cpu_sup:util/0 or cpu_sup:util/1 by a
68+
%% process will on most systems be the CPU utilization since system boot,
69+
%% but this is not guaranteed and the value should therefore be regarded as garbage.
70+
%% This also applies to the first call after a restart of cpu_sup.
71+
_Val = cpu_sup:util(),
72+
{noreply, #{}}.
73+
74+
handle_call(cpu_util, _From, State) ->
75+
Val = cpu_sup:util(),
76+
{reply, Val, State};
77+
handle_call({cpu_util, Args}, _From, State) ->
78+
Val = erlang:apply(cpu_sup, util, Args),
79+
{reply, Val, State};
80+
handle_call(Req, _From, State) ->
81+
?SLOG(error, #{msg => "unexpected_call", call => Req}),
82+
{reply, ignored, State}.
83+
84+
handle_cast(Msg, State) -&g E293 t;
85+
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
86+
{noreply, State}.
87+
88+
terminate(_Reason, _State) ->
89+
ok.
90+
91+
code_change(_OldVsn, State, _Extra) ->
92+
{ok, State}.

apps/emqx/src/emqx_os_mon.erl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
-behaviour(gen_server).
2020

21+
-include("emqx.hrl").
2122
-include("logger.hrl").
2223

2324
-export([start_link/0]).
@@ -47,8 +48,6 @@
4748
]).
4849
-export([is_os_check_supported/0]).
4950

50-
-include("emqx.hrl").
51-
5251
-define(OS_MON, ?MODULE).
5352

5453
start_link() ->
@@ -92,6 +91,8 @@ handle_continue(setup, undefined) ->
9291
SysHW = init_os_monitor(),
9392
MemRef = start_mem_check_timer(),
9493
CpuRef = start_cpu_check_timer(),
94+
%% the value of the first call should be regarded as garbage.
95+
_Val = cpu_sup:util(),
9596
{noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
9697

9798
init_os_monitor() ->
@@ -131,7 +132,7 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat
131132
handle_info({timeout, _Timer, cpu_check}, State) ->
132133
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
133134
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
134-
CPUVal = emqx_vm:cpu_util(),
135+
CPUVal = cpu_sup:util(),
135136
case CPUVal of
136137
%% 0 or 0.0
137138
Busy when Busy == 0 ->

apps/emqx/src/emqx_sys_mon.erl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ remove_handler() ->
5858
post_config_update(_, _Req, NewConf, OldConf, _AppEnvs) ->
5959
#{os := OS1, vm := VM1} = OldConf,
6060
#{os := OS2, vm := VM2} = NewConf,
61-
VM1 =/= VM2 andalso ?MODULE:update(VM2),
62-
OS1 =/= OS2 andalso emqx_os_mon:update(OS2),
61+
(VM1 =/= VM2) andalso ?MODULE:update(VM2),
62+
(OS1 =/= OS2) andalso emqx_os_mon:update(OS2),
6363
ok.
6464

6565
update(VM) ->

apps/emqx/src/emqx_sys_sup.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ start_link() ->
2828
init([]) ->
2929
OsMon =
3030
case emqx_os_mon:is_os_check_supported() of
31-
true -> [child_spec(emqx_os_mon)];
31+
true -> [child_spec(emqx_os_mon), child_spec(emqx_cpu_sup_worker)];
3232
false -> []
3333
end,
3434
Children =

apps/emqx/src/emqx_vm.erl

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
-module(emqx_vm).
1818

19+
-include("logger.hrl").
20+
1921
-export([
2022
schedulers/0,
2123
scheduler_usage/1,
@@ -376,28 +378,29 @@ avg15() ->
376378
compat_windows(fun cpu_sup:avg15/0).
377379

378380
cpu_util() ->
379-
compat_windows(fun cpu_sup:util/0).
381+
compat_windows(fun() -> emqx_cpu_sup_worker:cpu_util() end).
380382

381383
cpu_util(Args) ->
382-
compat_windows(fun cpu_sup:util/1, Args).
383-
384+
compat_windows(fun() -> emqx_cpu_sup_worker:cpu_util(Args) end).
385+
386+
-spec compat_windows(function()) -> any().
387+
compat_windows(Fun) when is_function(Fun, 0) ->
388+
case emqx_os_mon:is_os_check_supported() of
389+
true ->
390+
try Fun() of
391+
Val when is_float(Val) -> floor(Val * 100) / 100;
392+
Val when is_number(Val) -> Val;
393+
Val when is_tuple(Val) -> Val;
394+
_ -> 0.0
395+
catch
396+
_:_ -> 0.0
397+
end;
398+
false ->
399+
0.0
400+
end;
384401
compat_windows(Fun) ->
385-
case compat_windows(Fun, []) of
386-
Val when is_float(Val) -> floor(Val * 100) / 100;
387-
Val when is_number(Val) -> Val;
388-
_ -> 0.0
389-
end.
390-
391-
compat_windows(Fun, Args) ->
392-
try
393-
case emqx_os_mon:is_os_check_supported() of
394-
false -> 0.0;
395-
true when Args =:= [] -> Fun();
396-
true -> Fun(Args)
397-
end
398-
catch
399-
_:_ -> 0.0
400-
end.
402+
?SLOG(warning, "Invalid function: ~p", [Fun]),
403+
error({badarg, Fun}).
401404

402405
load(Avg) ->
403406
floor((Avg / 256) * 100) / 100.

apps/emqx/test/emqx_os_mon_SUITE.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,8 @@ do_sys_mem_check_alarm(_Config) ->
132132
get_memory_usage,
133133
fun() -> Mem end,
134134
fun() ->
135-
timer:sleep(500),
135+
%% wait for `os_mon` started
136+
timer:sleep(10_000),
136137
Alarms = emqx_alarm:get_alarms(activated),
137138
?assert(
138139
emqx_vm_mon_SUITE:is_existing(

apps/emqx/test/emqx_vm_SUITE.erl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121

2222
-include_lib("eunit/include/eunit.hrl").
2323

24-
all() -> emqx_common_test_helpers:all(?MODULE).
24+
all() ->
25+
emqx_common_test_helpers:all(?MODULE).
2526

2627
t_load(_Config) ->
2728
lists:foreach(
@@ -97,7 +98,7 @@ t_get_process_limit(_Config) ->
9798
emqx_vm:get_process_limit().
9899

99100
t_cpu_util(_Config) ->
100-
_Cpu = emqx_vm:cpu_util().
101+
?assertMatch(Val when is_number(Val), emqx_vm:cpu_util()).
101102

102103
easy_server() ->
103104
{ok, LSock} = gen_tcp:listen(5678, [binary, {packet, 0}, {active, false}]),

apps/emqx_management/src/emqx_mgmt.erl

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -205,23 +205,17 @@ cpu_stats() ->
205205
false ->
206206
[];
207207
true ->
208-
Idle = vm_stats('cpu.idle'),
209-
[
210-
{cpu_idle, Idle},
211-
{cpu_use, 100 - Idle}
212-
]
208+
vm_stats('cpu')
213209
end.
214210

215-
vm_stats('cpu.idle') ->
216-
case emqx_vm:cpu_util([detailed]) of
217-
{_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
218-
%% return {all, 0, 0, []} when cpu_sup is not started
219-
_ -> 0
220-
end;
221-
vm_stats('cpu.use') ->
222-
case vm_stats('cpu.idle') of
223-
0 -> 0;
224-
Idle -> 100 - Idle
211+
vm_stats('cpu') ->
212+
CpuUtilArg = [],
213+
case emqx_vm:cpu_util([CpuUtilArg]) of
214+
%% return 0.0 when `emqx_cpu_sup_worker` is not started
215+
{all, Use, Idle, _} ->
216+
[{cpu_use, Use}, {cpu_idle, Idle}];
217+
_ ->
218+
[{cpu_use, 0}, {cpu_idle, 0}]
225219
end;
226220
vm_stats('total.memory') ->
227221
{_, MemTotal} = get_sys_memory(),

changes/ce/fix-12663.en.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed an issue where `emqx_vm_cpu_use` and `emqx_vm_cpu_idle` metrics in Prometheus endpoint `/prometheus/stats` are always calculating average usage since operating system boot.

0 commit comments

Comments
 (0)
0