From 0630af6cb46ce4b041e95a0293f941c37b1b612d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jun 2025 15:20:54 +0200 Subject: [PATCH 01/31] feat(parser): tell how much more bytes are expected when incomplete This is useful for `socket`-based connections to request subsequent asynchronous socket reads. --- apps/emqx/src/emqx_connection.erl | 2 +- apps/emqx/src/emqx_frame.erl | 10 ++++++---- apps/emqx/src/emqx_quic_data_stream.erl | 2 +- apps/emqx/src/emqx_ws_connection.erl | 2 +- apps/emqx/test/emqx_frame_SUITE.erl | 10 +++++----- apps/emqx/test/emqx_mqtt_test_client.erl | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 17ce97eb225..f58cde38f4e 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -795,7 +795,7 @@ run_stream_parser(Data, Acc, N, ParseState, State) -> case emqx_frame:parse(Data, ParseState) of {Packet, Rest, NParseState} -> run_stream_parser(Rest, [Packet | Acc], N + 1, NParseState, State); - {more, NParseState} -> + {_More, NParseState} -> {N, Acc, State#state{parser = NParseState}} end. diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index cd04bd6bc2f..5694d51e70f 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -56,7 +56,9 @@ -type parse_state_initial() :: #options{}. -type parse_result() :: - {more, parse_state()} + %% Need more bytes out of stream, `0` means it's unclear how much more. + {_NeedMore :: non_neg_integer(), parse_state()} + %% There's a full packet. | {emqx_types:packet(), binary(), parse_state_initial()}. -type serialize_opts() :: options(). @@ -144,7 +146,7 @@ parse( ) -> parse_body_frame(Bin, Header, Need, Body, Options); parse(<<>>, State) -> - {more, State}. + {0, State}. %% @doc Parses _complete_ binary frame into a single `#mqtt_packet{}`. -spec parse_complete(iodata(), parse_state_initial()) -> @@ -170,7 +172,7 @@ parse_complete( end. parse_remaining_len(<<>>, Header, Mult, Length, Options) -> - {more, #remlen{hdr = Header, len = Length, mult = Mult, opts = Options}}; + {_NeedMore = 0, #remlen{hdr = Header, len = Length, mult = Mult, opts = Options}}; parse_remaining_len(<<0:8, Rest/binary>>, Header, 1, 0, Options) -> Packet = parse_bodyless_packet(Header), {Packet, Rest, Options}; @@ -226,7 +228,7 @@ parse_body_frame(Bin, Header, Need, Body, Options) -> case Need - byte_size(Bin) of More when More > 0 -> NewBody = append_body(Body, Bin), - {more, #body{hdr = Header, need = More, acc = NewBody, opts = Options}}; + {More, #body{hdr = Header, need = More, acc = NewBody, opts = Options}}; _ -> <> = Bin, Frame = iolist_to_binary(append_body(Body, LastPart)), diff --git a/apps/emqx/src/emqx_quic_data_stream.erl b/apps/emqx/src/emqx_quic_data_stream.erl index daf84eeec97..2eb1d557afe 100644 --- a/apps/emqx/src/emqx_quic_data_stream.erl +++ b/apps/emqx/src/emqx_quic_data_stream.erl @@ -398,7 +398,7 @@ do_parse_incoming(<<>>, Packets, ParseState) -> {Packets, ParseState}; do_parse_incoming(Data, Packets, ParseState) -> case emqx_frame:parse(Data, ParseState) of - {more, NParseState} -> + {_More, NParseState} -> {Packets, NParseState}; {Packet, Rest, NParseState} -> do_parse_incoming(Rest, [Packet | Packets], NParseState) diff --git a/apps/emqx/src/emqx_ws_connection.erl b/apps/emqx/src/emqx_ws_connection.erl index 2b464adb377..ff024599c32 100644 --- a/apps/emqx/src/emqx_ws_connection.erl +++ b/apps/emqx/src/emqx_ws_connection.erl @@ -590,7 +590,7 @@ parse_incoming(<<>>, Packets, State) -> {lists:reverse(Packets), State}; parse_incoming(Data, Packets, State = #state{parse_state = ParseState}) -> try emqx_frame:parse(Data, ParseState) of - {more, NParseState} -> + {_More, NParseState} -> {Packets, State#state{parse_state = NParseState}}; {Packet, Rest, NParseState} -> NState = State#state{parse_state = NParseState}, diff --git a/apps/emqx/test/emqx_frame_SUITE.erl b/apps/emqx/test/emqx_frame_SUITE.erl index 383fc168720..1344968c5f0 100644 --- a/apps/emqx/test/emqx_frame_SUITE.erl +++ b/apps/emqx/test/emqx_frame_SUITE.erl @@ -125,10 +125,10 @@ t_parse_cont(_) -> Packet = ?CONNECT_PACKET(#mqtt_packet_connect{}), ParseState = emqx_frame:initial_parse_state(), <> = serialize_to_binary(Packet), - {more, ContParse} = emqx_frame:parse(<<>>, ParseState), - {more, ContParse1} = emqx_frame:parse(HdrBin, ContParse), - {more, ContParse2} = emqx_frame:parse(LenBin, ContParse1), - {more, ContParse3} = emqx_frame:parse(<<>>, ContParse2), + {0, ContParse} = emqx_frame:parse(<<>>, ParseState), + {0, ContParse1} = emqx_frame:parse(HdrBin, ContParse), + {12, ContParse2} = emqx_frame:parse(LenBin, ContParse1), + {12, ContParse3} = emqx_frame:parse(<<>>, ContParse2), {Packet, <<>>, _} = emqx_frame:parse(RestBin, ContParse3). t_parse_frame_too_large(_) -> @@ -411,7 +411,7 @@ t_parse_sticky_frames(_) -> Size = size(Bin), <> = Bin, %% needs 2 more bytes - {more, PState1} = emqx_frame:parse(H), + {2, PState1} = emqx_frame:parse(H), %% feed 3 bytes as if the next 1 byte belongs to the next packet. {_, <<42>>, PState2} = emqx_frame:parse(iolist_to_binary([TailTwoBytes, 42]), PState1), ?assertMatch(#{state := clean}, emqx_frame:describe_state(PState2)). diff --git a/apps/emqx/test/emqx_mqtt_test_client.erl b/apps/emqx/test/emqx_mqtt_test_client.erl index 2ea774681ee..fdbb338eb3e 100644 --- a/apps/emqx/test/emqx_mqtt_test_client.erl +++ b/apps/emqx/test/emqx_mqtt_test_client.erl @@ -151,7 +151,7 @@ terminate(_Reason, _St) -> process_incoming(PSt, Data, Packets) -> case emqx_frame:parse(Data, PSt) of - {more, NewPSt} -> + {_More, NewPSt} -> {NewPSt, lists:reverse(Packets)}; {Packet, Rest, NewPSt} -> process_incoming(NewPSt, Rest, [Packet | Packets]) From 7cf8259fd0cfb746c8371dd1b9c2ed43a1114679 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 23 Jul 2025 16:19:31 +0200 Subject: [PATCH 02/31] chore: update `esockd` to 5.15.0 with `esockd_socket` backend --- mix.exs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index fcb09cadb69..06171ff1d47 100644 --- a/mix.exs +++ b/mix.exs @@ -153,7 +153,10 @@ defmodule EMQXUmbrella.MixProject do end def common_dep(:ekka), do: {:ekka, github: "emqx/ekka", tag: "0.23.0", override: true} - def common_dep(:esockd), do: {:esockd, github: "emqx/esockd", tag: "5.14.0", override: true} + + def common_dep(:esockd), + do: {:esockd, github: "emqx/esockd", tag: "5.15.0", override: true} + def common_dep(:gproc), do: {:gproc, github: "emqx/gproc", tag: "0.9.0.1", override: true} def common_dep(:hocon), do: {:hocon, github: "emqx/hocon", tag: "0.45.4", override: true} def common_dep(:lc), do: {:lc, github: "emqx/lc", tag: "0.3.4", override: true} From be4b994d21e86a3b5db6ecd30e61eaf3e78a5feb Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jun 2025 18:12:43 +0200 Subject: [PATCH 03/31] feat(socket): implement `esockd_socket` connection module --- apps/emqx/src/emqx_congestion.erl | 3 + apps/emqx/src/emqx_socket_connection.erl | 1137 ++++++++++++++++++++++ 2 files changed, 1140 insertions(+) create mode 100644 apps/emqx/src/emqx_socket_connection.erl diff --git a/apps/emqx/src/emqx_congestion.erl b/apps/emqx/src/emqx_congestion.erl index 03ecce3bad1..0a5176bb1bf 100644 --- a/apps/emqx/src/emqx_congestion.erl +++ b/apps/emqx/src/emqx_congestion.erl @@ -105,6 +105,9 @@ do_cancel_alarm_congestion(Socket, Transport, Channel, Reason) -> emqx_alarm:ensure_deactivated(?ALARM_CONN_CONGEST(Channel, Reason), AlarmDetails, Message), ok. +is_tcp_congested(_Socket, esockd_socket) -> + %% TODO: No such concept in `socket`-based sockets. + false; is_tcp_congested(Socket, Transport) -> case Transport:getstat(Socket, [send_pend]) of {ok, [{send_pend, N}]} when N > 0 -> true; diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl new file mode 100644 index 00000000000..f9b370fd2fc --- /dev/null +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -0,0 +1,1137 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2018-2025 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +%% This module interacts with the transport layer of MQTT +%% Transport: esockd_socket. +-module(emqx_socket_connection). + +-include("emqx.hrl"). +-include("emqx_mqtt.hrl"). +-include("logger.hrl"). +-include("types.hrl"). +-include("emqx_external_trace.hrl"). +-include("emqx_instr.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + +-elvis([{elvis_style, used_ignored_variable, disable}]). +-elvis([{elvis_style, invalid_dynamic_call, #{ignore => [emqx_connection]}}]). + +%% API +-export([ + start_link/3, + stop/1 +]). + +-export([ + info/1, + info/2, + stats/1 +]). + +-export([ + async_set_keepalive/4, + async_set_socket_options/2 +]). + +-export([ + call/2, + call/3, + cast/2 +]). + +%% Callback +-export([init/4]). + +%% Sys callbacks +-export([ + system_continue/3, + system_terminate/4, + system_code_change/4, + system_get_state/1 +]). + +%% Internal callback +-export([wakeup_from_hib/2, recvloop/2, get_state/1]). + +%% Export for CT +-export([set_field/3]). + +-export_type([ + state/0, + parser/0 +]). + +-record(state, { + %% TCP/TLS Socket + socket :: socket:socket(), + %% Sock State + sockstate :: emqx_types:sockstate(), + %% Packet parser / serializer + parser :: parser(), + serialize :: emqx_frame:serialize_opts(), + %% Channel State + channel :: emqx_channel:channel(), + %% GC State + gc_state :: option(emqx_gc:gc_state()), + %% Stats Timer + %% When `disabled` stats are never reported. + %% Until complete CONNECT packet received acts as idle timer, which shuts + %% the connection down once triggered. + stats_timer :: disabled | option(reference()) | {idle, reference()}, + %% ActiveN + GC tracker + gc_tracker :: gc_tracker(), + %% Hibernate connection process if inactive for + hibernate_after :: integer() | infinity, + %% Zone name + zone :: atom(), + %% Listener Type and Name + listener :: {Type :: atom(), Name :: atom()}, + + %% Extra field for future hot-upgrade support + extra = [] +}). + +-type gc_tracker() :: + { + ActiveN :: non_neg_integer(), + {PktsIn :: non_neg_integer(), BytesIn :: non_neg_integer()}, + {PktsOut :: non_neg_integer(), BytesOut :: non_neg_integer()} + }. + +-type parser() :: + %% Bytestream parser. + _Stream :: emqx_frame:parse_state(). + +-opaque state() :: #state{}. + +-define(INFO_KEYS, [ + socktype, + peername, + sockname, + sockstate +]). + +-define(SOCK_STATS, [ + recv_oct, + recv_cnt, + send_oct, + send_cnt, + send_pend +]). + +-define(ENABLED(X), (X =/= undefined)). + +-define(LOG(Level, Data), ?SLOG(Level, (Data)#{tag => "MQTT"})). + +-spec start_link(esockd_socket, socket:socket(), emqx_channel:opts()) -> {ok, pid()}. +start_link(Transport, Socket, Options) -> + Args = [self(), Transport, Socket, Options], + CPid = proc_lib:spawn_link(?MODULE, init, Args), + {ok, CPid}. + +%%-------------------------------------------------------------------- +%% API +%%-------------------------------------------------------------------- + +%% @doc Get infos of the connection/channel. +-spec info(pid() | state()) -> emqx_types:infos(). +info(CPid) when is_pid(CPid) -> + call(CPid, info); +info(State = #state{channel = Channel}) -> + ChanInfo = emqx_channel:info(Channel), + SockInfo = maps:from_list(info(?INFO_KEYS, State)), + ChanInfo#{sockinfo => SockInfo}. + +-spec info([atom()] | atom() | tuple(), pid() | state()) -> term(). +info(Keys, State) when is_list(Keys) -> + [{Key, info(Key, State)} || Key <- Keys]; +info(socktype, #state{}) -> + tcp; +info(peername, #state{channel = Channel}) -> + emqx_channel:info(peername, Channel); +info(sockname, #state{channel = Channel}) -> + emqx_channel:info(sockname, Channel); +info(sockstate, #state{sockstate = SockSt}) -> + SockSt; +info(stats_timer, #state{stats_timer = StatsTimer}) -> + StatsTimer; +info({channel, Info}, #state{channel = Channel}) -> + emqx_channel:info(Info, Channel). + +%% @doc Get stats of the connection/channel. +-spec stats(pid() | state()) -> emqx_types:stats(). +stats(CPid) when is_pid(CPid) -> + call(CPid, stats); +stats(#state{socket = Socket, channel = Channel}) -> + #{counters := Counters} = socket:info(Socket), + SockStats = lists:map( + fun + (S = recv_oct) -> {S, maps:get(read_byte, Counters, 0)}; + (S = recv_cnt) -> {S, maps:get(read_pkg, Counters, 0)}; + (S = send_oct) -> {S, maps:get(write_byte, Counters, 0)}; + (S = send_cnt) -> {S, maps:get(write_pkg, Counters, 0)}; + (S = send_pend) -> {S, 0} + end, + ?SOCK_STATS + ), + ChanStats = emqx_channel:stats(Channel), + ProcStats = emqx_utils:proc_stats(), + lists:append([SockStats, ChanStats, ProcStats]). + +%% @doc Set TCP keepalive socket options to override system defaults. +%% Idle: The number of seconds a connection needs to be idle before +%% TCP begins sending out keep-alive probes (Linux default 7200). +%% Interval: The number of seconds between TCP keep-alive probes +%% (Linux default 75). +%% Probes: The maximum number of TCP keep-alive probes to send before +%% giving up and killing the connection if no response is +%% obtained from the other end (Linux default 9). +%% +%% NOTE: This API sets TCP socket options, which has nothing to do with +%% the MQTT layer's keepalive (PINGREQ and PINGRESP). +async_set_keepalive(Pid, Idle, Interval, Probes) -> + async_set_socket_options(Pid, [ + {{socket, keepalive}, true}, + {{tcp, keepcnt}, Probes}, + {{tcp, keepidle}, Idle}, + {{tcp, keepintvl}, Interval} + ]). + +%% @doc Set custom socket options. +%% This API is made async because the call might be originated from +%% a hookpoint callback (otherwise deadlock). +%% If failed to set, the error message is logged. +async_set_socket_options(Pid, Options) -> + cast(Pid, {async_set_socket_options, Options}). + +cast(Pid, Req) -> + gen_server:cast(Pid, Req). + +call(Pid, Req) -> + call(Pid, Req, infinity). +call(Pid, Req, Timeout) -> + gen_server:call(Pid, Req, Timeout). + +stop(Pid) -> + gen_server:stop(Pid). + +%%-------------------------------------------------------------------- +%% callbacks +%%-------------------------------------------------------------------- + +init(Parent, esockd_socket, RawSocket, Options) -> + case esockd_socket:wait(RawSocket) of + {ok, Socket} -> + run_loop(Parent, init_state(Socket, Options)); + {error, Reason} -> + ok = esockd_socket:fast_close(RawSocket), + exit_on_sock_error(Reason) + end. + +init_state( + Socket, + #{zone := Zone, listener := {Type, Listener}} = Opts +) -> + SockType = ensure_ok_or_exit(esockd_socket:type(Socket), Socket), + {ok, Peername} = ensure_ok_or_exit(esockd_socket:peername(Socket), Socket), + {ok, Sockname} = ensure_ok_or_exit(esockd_socket:sockname(Socket), Socket), + Peercert = ensure_ok_or_exit(esockd_socket:peercert(Socket), Socket), + PeerSNI = ensure_ok_or_exit(esockd_socket:peersni(Socket), Socket), + ConnInfo = #{ + socktype => SockType, + peername => Peername, + sockname => Sockname, + peercert => Peercert, + peersni => PeerSNI, + conn_mod => ?MODULE, + sock => Socket + }, + + ActiveN = get_active_n(Type, Listener), + FrameOpts = #{ + strict_mode => emqx_config:get_zone_conf(Zone, [mqtt, strict_mode]), + max_size => emqx_config:get_zone_conf(Zone, [mqtt, max_packet_size]) + }, + Parser = init_parser(FrameOpts), + Serialize = emqx_frame:initial_serialize_opts(FrameOpts), + %% Init Channel + Channel = emqx_channel:init(ConnInfo, Opts), + GcState = + case emqx_config:get_zone_conf(Zone, [force_gc]) of + #{enable := false} -> undefined; + GcPolicy -> emqx_gc:init(GcPolicy) + end, + + #state{ + socket = Socket, + sockstate = idle, + parser = Parser, + serialize = Serialize, + channel = Channel, + gc_state = GcState, + gc_tracker = init_gc_tracker(ActiveN), + hibernate_after = maps:get(hibernate_after, Opts, get_zone_idle_timeout(Zone)), + zone = Zone, + listener = {Type, Listener}, + extra = [] + }. + +ensure_ok_or_exit(Result, Sock) -> + case Result of + {error, Reason} when Reason =:= enotconn; Reason =:= closed -> + esockd_socket:fast_close(Sock), + exit(normal); + {error, Reason} -> + esockd_socket:fast_close(Sock), + exit({shutdown, Reason}); + Ok -> + Ok + end. + +init_gc_tracker(ActiveN) -> + {ActiveN, {0, 0}, {0, 0}}. + +run_loop( + Parent, + State = #state{ + socket = Socket, + channel = Channel, + listener = Listener, + zone = Zone + } +) -> + Peername = emqx_channel:info(peername, Channel), + emqx_logger:set_metadata_peername(esockd:format(Peername)), + ShutdownPolicy = emqx_config:get_zone_conf(Zone, [force_shutdown]), + _ = emqx_utils:tune_heap_size(ShutdownPolicy), + ok = set_tcp_keepalive(Listener), + case sock_async_recv(Socket, 0) of + {ok, Data} -> + NState = start_idle_timer(State), + handle_recv({recv_more, Data}, Parent, NState); + {select, _SelectInfo} -> + NState = start_idle_timer(State), + hibernate(Parent, NState); + {error, {Reason, _}} -> + _ = Reason == closed orelse esockd_socket:fast_close(Socket), + exit_on_sock_error(Reason); + {error, Reason} -> + _ = Reason == closed orelse esockd_socket:fast_close(Socket), + exit_on_sock_error(Reason) + end. + +-spec exit_on_sock_error(any()) -> no_return(). +exit_on_sock_error(Reason) when + Reason =:= einval; + Reason =:= enotconn; + Reason =:= closed +-> + erlang:exit(normal); +exit_on_sock_error(Reason) -> + erlang:exit({shutdown, Reason}). + +%%-------------------------------------------------------------------- +%% Recv Loop + +recvloop( + Parent, + State = #state{ + hibernate_after = HibernateTimeout, + zone = Zone + } +) -> + receive + Msg -> + handle_recv(Msg, Parent, State) + after HibernateTimeout -> + case emqx_olp:backoff_hibernation(Zone) of + true -> + recvloop(Parent, State); + false -> + _ = try_set_chan_stats(State), + hibernate(Parent, cancel_stats_timer(State)) + end + end. + +handle_recv({system, From, Request}, Parent, State) -> + sys:handle_system_msg(Request, From, Parent, ?MODULE, [], State); +handle_recv({'EXIT', Parent, Reason}, Parent, State) -> + %% FIXME: it's not trapping exit, should never receive an EXIT + terminate(Reason, State); +handle_recv(Msg, Parent, State) -> + case process_msg(Msg, ensure_stats_timer(State)) of + {ok, NewState} -> + ?MODULE:recvloop(Parent, NewState); + {stop, Reason, NewSate} -> + terminate(Reason, NewSate) + end. + +hibernate(Parent, State) -> + proc_lib:hibernate(?MODULE, wakeup_from_hib, [Parent, State]). + +%% Maybe do something here later. +wakeup_from_hib(Parent, State) -> + ?MODULE:recvloop(Parent, State). + +%%-------------------------------------------------------------------- + +-compile({inline, [sock_async_recv/2]}). + +sock_async_recv(Socket, Len) -> + socket:recv(Socket, Len, [], nowait). + +sock_setopts(Socket, [{Opt, Value} | Rest]) -> + case socket:setopt(Socket, Opt, Value) of + ok -> sock_setopts(Socket, Rest); + Error -> Error + end; +sock_setopts(_Sock, []) -> + ok. + +%%-------------------------------------------------------------------- +%% Ensure/cancel stats timer + +init_stats_timer(#state{zone = Zone}) -> + case emqx_config:get_zone_conf(Zone, [stats, enable]) of + true -> undefined; + false -> disabled + end. + +-compile({inline, [ensure_stats_timer/1]}). +ensure_stats_timer(State = #state{stats_timer = undefined}) -> + Timeout = get_zone_idle_timeout(State#state.zone), + State#state{stats_timer = start_timer(Timeout, emit_stats)}; +ensure_stats_timer(State) -> + %% Either already active, disabled, or paused. + State. + +-compile({inline, [cancel_stats_timer/1]}). +cancel_stats_timer(State = #state{stats_timer = TRef}) when is_reference(TRef) -> + ?tp(debug, cancel_stats_timer, #{}), + ok = emqx_utils:cancel_timer(TRef), + State#state{stats_timer = undefined}; +cancel_stats_timer(State) -> + State. + +start_idle_timer(State = #state{zone = Zone}) -> + IdleTimeout = get_zone_idle_timeout(Zone), + TimerRef = start_timer(IdleTimeout, idle_timeout), + State#state{stats_timer = {idle, TimerRef}}. + +cancel_idle_timer(#state{stats_timer = {idle, TRef}}) -> + emqx_utils:cancel_timer(TRef); +cancel_idle_timer(_State) -> + ok. + +-compile({inline, [get_zone_idle_timeout/1]}). +get_zone_idle_timeout(Zone) -> + emqx_channel:get_mqtt_conf(Zone, idle_timeout). + +%%-------------------------------------------------------------------- +%% Process next Msg + +process_msgs([], State) -> + {ok, State}; +process_msgs([Msgs | More], State) when is_list(Msgs) -> + case process_msgs(Msgs, State) of + {ok, NState} -> + process_msgs(More, NState); + Stop -> + Stop + end; +process_msgs([Msg | More], State) -> + case process_msg(Msg, State) of + {ok, NState} -> + process_msgs(More, NState); + Stop -> + Stop + end. + +process_msg(Msg, State) -> + try handle_msg(Msg, State) of + ok -> + {ok, State}; + {ok, NState} -> + {ok, NState}; + {ok, NextMsgs, NState} when is_list(NextMsgs) -> + process_msgs(NextMsgs, NState); + {ok, NextMsg, NState} -> + process_msg(NextMsg, NState); + {stop, Reason, NState} -> + {stop, Reason, NState}; + {stop, Reason} -> + {stop, Reason, State} + catch + exit:normal -> + {stop, normal, State}; + exit:shutdown -> + {stop, shutdown, State}; + exit:{shutdown, _} = Shutdown -> + {stop, Shutdown, State}; + Exception:Context:Stack -> + {stop, + #{ + exception => Exception, + context => Context, + stacktrace => Stack + }, + State} + end. + +%%-------------------------------------------------------------------- +%% Handle a Msg +handle_msg({'$gen_call', From, Req}, State) -> + case handle_call(From, Req, State) of + {reply, Reply, NState} -> + gen_server:reply(From, Reply), + {ok, NState}; + {stop, Reason, Reply, NState} -> + gen_server:reply(From, Reply), + stop(Reason, NState) + end; +handle_msg({'$gen_cast', Req}, State) -> + NewState = handle_cast(Req, State), + {ok, NewState}; +handle_msg({'$socket', Socket, select, _Handle}, State) -> + case sock_async_recv(Socket, 0) of + {ok, Data} -> + handle_data(Data, true, State); + {error, {closed, Data}} -> + {ok, [{recv, Data}, {sock_closed, tcp_closed}], socket_closed(State)}; + {error, closed} -> + handle_info({sock_closed, tcp_closed}, socket_closed(State)); + {error, {Reason, Data}} -> + {ok, [{recv, Data}, {sock_error, Reason}], State}; + {error, Reason} -> + handle_info({sock_error, Reason}, State) + end; +handle_msg({'$socket', _Socket, abort, {_Handle, Reason}}, State) -> + handle_info({sock_error, Reason}, State); +handle_msg({recv, Data}, State) -> + handle_data(Data, false, State); +handle_msg({recv_more, Data}, State) -> + handle_data(Data, true, State); +handle_msg({incoming, Packet}, State) -> + ?TRACE("MQTT", "mqtt_packet_received", #{packet => Packet}), + handle_incoming(Packet, State); +handle_msg({outgoing, Packets}, State) -> + handle_outgoing(Packets, State); +handle_msg( + Deliver = {deliver, _Topic, _Msg}, + #state{gc_tracker = {ActiveN, _, _}} = State +) -> + ?BROKER_INSTR_SETMARK(t0_deliver, {_Msg#message.extra, ?BROKER_INSTR_TS()}), + Delivers = [Deliver | emqx_utils:drain_deliver(ActiveN)], + with_channel(handle_deliver, [Delivers], State); +handle_msg({connack, ConnAck}, State) -> + handle_outgoing(ConnAck, State); +handle_msg({close, Reason}, State) -> + %% @FIXME here it could be close due to appl error. + ?TRACE("SOCKET", "socket_force_closed", #{reason => Reason}), + handle_info({sock_closed, Reason}, close_socket(State)); +handle_msg({event, connected}, State = #state{channel = Channel}) -> + ClientId = emqx_channel:info(clientid, Channel), + emqx_cm:insert_channel_info(ClientId, info(State), stats(State)), + {ok, ensure_stats_timer(State)}; +handle_msg({event, disconnected}, State = #state{channel = Channel}) -> + ClientId = emqx_channel:info(clientid, Channel), + emqx_cm:set_chan_info(ClientId, info(State)), + {ok, State}; +handle_msg({event, _Other}, State = #state{channel = Channel}) -> + case emqx_channel:info(clientid, Channel) of + %% ClientId is yet unknown (i.e. connect packet is not received yet) + undefined -> + ok; + ClientId -> + emqx_cm:set_chan_info(ClientId, info(State)), + emqx_cm:set_chan_stats(ClientId, stats(State)) + end, + {ok, State}; +handle_msg({timeout, TRef, TMsg}, State) -> + handle_timeout(TRef, TMsg, State); +handle_msg(Shutdown = {shutdown, _Reason}, State) -> + stop(Shutdown, State); +handle_msg(Msg, State) -> + handle_info(Msg, State). + +%%-------------------------------------------------------------------- +%% Terminate + +-spec terminate(any(), state()) -> no_return(). +terminate( + Reason, + State = #state{ + channel = Channel, + socket = Socket + } +) -> + try + Channel1 = emqx_channel:set_conn_state(disconnected, Channel), + emqx_congestion:cancel_alarms(Socket, esockd_socket, Channel1), + emqx_channel:terminate(Reason, Channel1), + close_socket_ok(State), + ?TRACE("SOCKET", "emqx_connection_terminated", #{reason => Reason}) + catch + E:C:S -> + ?tp(warning, unclean_terminate, #{exception => E, context => C, stacktrace => S}) + end, + ?tp(info, terminate, #{reason => Reason}), + maybe_raise_exception(Reason). + +%% close socket, discard new state, always return ok. +close_socket_ok(State) -> + _ = close_socket(State), + ok. + +%% tell truth about the original exception +-spec maybe_raise_exception(any()) -> no_return(). +maybe_raise_exception(#{ + exception := Exception, + context := Context, + stacktrace := Stacktrace +}) -> + erlang:raise(Exception, Context, Stacktrace); +maybe_raise_exception({shutdown, normal}) -> + ok; +maybe_raise_exception(normal) -> + ok; +maybe_raise_exception(shutdown) -> + ok; +maybe_raise_exception(Reason) -> + exit(Reason). + +%%-------------------------------------------------------------------- +%% Sys callbacks + +system_continue(Parent, _Debug, State) -> + ?MODULE:recvloop(Parent, State). + +system_terminate(Reason, _Parent, _Debug, State) -> + terminate(Reason, State). + +system_code_change(State, _Mod, _OldVsn, _Extra) -> + {ok, State}. + +system_get_state(State) -> {ok, State}. + +%%-------------------------------------------------------------------- +%% Handle call + +handle_call(_From, info, State) -> + {reply, info(State), State}; +handle_call(_From, stats, State) -> + {reply, stats(State), State}; +handle_call(_From, Req, State = #state{channel = Channel}) -> + case emqx_channel:handle_call(Req, Channel) of + {reply, Reply, NChannel} -> + {reply, Reply, State#state{channel = NChannel}}; + {shutdown, Reason, Reply, NChannel} -> + shutdown(Reason, Reply, State#state{channel = NChannel}); + {shutdown, Reason, Reply, OutPacket, NChannel} -> + NState = State#state{channel = NChannel}, + {ok, NState2} = handle_outgoing(OutPacket, NState), + NState3 = graceful_shutdown_transport(Reason, NState2), + shutdown(Reason, Reply, NState3) + end. + +%%-------------------------------------------------------------------- +%% Handle timeout + +handle_timeout(_TRef, idle_timeout, State) -> + shutdown(idle_timeout, State); +handle_timeout( + _TRef, + emit_stats, + State = #state{ + channel = Channel, + socket = Socket + } +) -> + ClientId = emqx_channel:info(clientid, Channel), + emqx_cm:set_chan_stats(ClientId, stats(State)), + emqx_congestion:maybe_alarm_conn_congestion(Socket, esockd_socket, Channel), + {ok, State#state{stats_timer = undefined}}; +handle_timeout( + TRef, + keepalive, + State = #state{ + channel = Channel + } +) -> + case emqx_channel:info(conn_state, Channel) of + disconnected -> + {ok, State}; + _ -> + with_channel(handle_timeout, [TRef, keepalive], State) + end; +handle_timeout(TRef, Msg, State) -> + with_channel(handle_timeout, [TRef, Msg], State). + +try_set_chan_stats(State = #state{channel = Channel}) -> + case emqx_channel:info(clientid, Channel) of + %% ClientID is not yet known, nothing to report. + undefined -> false; + ClientId -> emqx_cm:set_chan_stats(ClientId, stats(State)) + end. + +%%-------------------------------------------------------------------- +%% Parse incoming data + +handle_data( + Data, + RequestMore, + State0 = #state{ + socket = Socket, + sockstate = SS, + gc_tracker = {ActiveN, {Pubs, Bytes}, Out} + } +) -> + Oct = iolist_size(Data), + emqx_metrics:inc('bytes.received', Oct), + ?LOG(debug, #{ + msg => "raw_bin_received", + size => Oct, + bin => binary_to_list(binary:encode_hex(Data)), + type => "hex" + }), + {More, N, Packets, State1} = parse_incoming(Data, State0), + State = State1#state{gc_tracker = {ActiveN, {Pubs + N, Bytes + Oct}, Out}}, + Msgs = next_incoming_msgs(Packets), + case RequestMore of + false -> + {ok, Msgs, State}; + true when SS =/= closed -> + request_more_data(Socket, More, Msgs, State); + _ -> + {ok, Msgs, State} + end. + +-compile({inline, [request_more_data/4]}). +request_more_data(Socket, More, Acc, State) -> + case sock_async_recv(Socket, More) of + {ok, DataMore} -> + {ok, [Acc, {recv_more, DataMore}], State}; + {select, {_Info, DataMore}} -> + {ok, [Acc, {recv, DataMore}], State}; + {select, _Info} -> + {ok, Acc, State}; + {error, {closed, DataMore}} -> + NState = socket_closed(State), + {ok, [Acc, {recv, DataMore}, {sock_closed, tcp_closed}], NState}; + {error, closed} -> + NState = socket_closed(State), + {ok, [Acc, {sock_closed, tcp_closed}], NState}; + {error, {Reason, DataMore}} -> + {ok, [Acc, {recv, DataMore}, {sock_error, Reason}], State}; + {error, Reason} -> + {ok, [Acc, {sock_error, Reason}], State} + end. + +%% @doc: return a reversed Msg list +-compile({inline, [next_incoming_msgs/1]}). +next_incoming_msgs([Packet]) -> + {incoming, Packet}; +next_incoming_msgs(Packets) -> + Fun = fun(Packet, Acc) -> [{incoming, Packet} | Acc] end, + lists:foldl(Fun, [], Packets). + +parse_incoming(Data, State = #state{parser = Parser}) -> + try + run_parser(Data, Parser, State) + catch + throw:{?FRAME_PARSE_ERROR, Reason} -> + ?LOG(info, #{ + msg => "frame_parse_error", + reason => Reason, + at_state => describe_parser_state(Parser), + input_bytes => Data + }), + NState = update_state_on_parse_error(Reason, State), + {0, 0, [{frame_error, Reason}], NState}; + error:Reason:Stacktrace -> + ?LOG(error, #{ + msg => "frame_parse_failed", + at_state => describe_parser_state(Parser), + input_bytes => Data, + reason => Reason, + stacktrace => Stacktrace + }), + {0, 0, [{frame_error, Reason}], State} + end. + +init_parser(FrameOpts) -> + %% Go with regular streaming parser. + emqx_frame:initial_parse_state(FrameOpts). + +update_state_on_parse_error(#{proto_ver := ProtoVer, parse_state := ParseState}, State) -> + Serialize = emqx_frame:serialize_opts(ProtoVer, ?MAX_PACKET_SIZE), + State#state{serialize = Serialize, parser = ParseState}; +update_state_on_parse_error(_, State) -> + State. + +run_parser(Data, ParseState, State) -> + run_stream_parser(Data, [], 0, ParseState, State). + +-compile({inline, [run_stream_parser/5]}). +run_stream_parser(<<>>, Acc, N, NParseState, State) -> + {0, N, Acc, State#state{parser = NParseState}}; +run_stream_parser(Data, Acc, N, ParseState, State) -> + case emqx_frame:parse(Data, ParseState) of + {Packet, Rest, NParseState} -> + run_stream_parser(Rest, [Packet | Acc], N + 1, NParseState, State); + {More, NParseState} -> + {More, N, Acc, State#state{parser = NParseState}} + end. + +describe_parser_state(ParseState) -> + emqx_frame:describe_state(ParseState). + +%%-------------------------------------------------------------------- +%% Handle incoming packet + +handle_incoming(Packet = ?PACKET(Type), State) -> + inc_incoming_stats(Packet), + case Type of + ?CONNECT -> + %% CONNECT packet is fully received, time to cancel idle timer. + ok = cancel_idle_timer(State), + NState = State#state{ + serialize = emqx_frame:serialize_opts(Packet#mqtt_packet.variable), + stats_timer = init_stats_timer(State) + }, + with_channel(handle_in, [Packet], NState); + _ -> + with_channel(handle_in, [Packet], State) + end; +handle_incoming(FrameError, State) -> + with_channel(handle_in, [FrameError], State). + +%%-------------------------------------------------------------------- +%% With Channel + +with_channel(Fun, Args, State = #state{channel = Channel}) -> + case erlang:apply(emqx_channel, Fun, Args ++ [Channel]) of + ok -> + {ok, State}; + {ok, NChannel} -> + {ok, State#state{channel = NChannel}}; + {ok, Replies, NChannel} -> + {ok, next_msgs(Replies), State#state{channel = NChannel}}; + {continue, Replies, NChannel} -> + %% NOTE: Will later go back to `emqx_channel:handle_info/2`. + {ok, [next_msgs(Replies), continue], State#state{channel = NChannel}}; + {shutdown, Reason, NChannel} -> + shutdown(Reason, State#state{channel = NChannel}); + {shutdown, Reason, Packet, NChannel} -> + NState = State#state{channel = NChannel}, + {ok, NState2} = handle_outgoing(Packet, NState), + shutdown(Reason, NState2) + end. + +%%-------------------------------------------------------------------- +%% Handle outgoing packets + +handle_outgoing(Packets, State = #state{channel = _Channel}) -> + Res = do_handle_outgoing(Packets, State), + _ = ?EXT_TRACE_OUTGOING_STOP( + emqx_external_trace:basic_attrs(_Channel), + Packets + ), + Res. + +do_handle_outgoing(Packets, State) when is_list(Packets) -> + N = length(Packets), + send(N, [serialize_and_inc_stats(State, Packet) || Packet <- Packets], State); +do_handle_outgoing(Packet, State) -> + send(1, serialize_and_inc_stats(State, Packet), State). + +serialize_and_inc_stats(#state{serialize = Serialize}, Packet) -> + try emqx_frame:serialize_pkt(Packet, Serialize) of + <<>> -> + ?LOG(warning, #{ + msg => "packet_is_discarded", + reason => "frame_is_too_large", + packet => emqx_packet:format(Packet, hidden) + }), + emqx_metrics:inc('delivery.dropped.too_large'), + emqx_metrics:inc('delivery.dropped'), + inc_dropped_stats(), + <<>>; + Data -> + ?TRACE("MQTT", "mqtt_packet_sent", #{packet => Packet}), + emqx_metrics:inc_sent(Packet), + inc_outgoing_stats(Packet), + Data + catch + %% Maybe Never happen. + throw:{?FRAME_SERIALIZE_ERROR, Reason} -> + ?LOG(info, #{ + reason => Reason, + input_packet => Packet + }), + erlang:error({?FRAME_SERIALIZE_ERROR, Reason}); + error:Reason:Stacktrace -> + ?LOG(error, #{ + input_packet => Packet, + exception => Reason, + stacktrace => Stacktrace + }), + erlang:error(?FRAME_SERIALIZE_ERROR) + end. + +%%-------------------------------------------------------------------- +%% Send data + +-spec send(non_neg_integer(), iodata(), state()) -> {ok, state()}. +send(Num, IoData, #state{socket = Socket, sockstate = SS} = State) -> + Oct = iolist_size(IoData), + emqx_metrics:inc('bytes.sent', Oct), + %% FIXME timeout + case SS =/= closed andalso socket:send(Socket, IoData, 15_000) of + ok -> + Ok = sent(Num, Oct, State), + ?BROKER_INSTR_WMARK(t0_deliver, {T0, TDeliver} when is_integer(T0), begin + TSent = ?BROKER_INSTR_TS(), + ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_delay_us, ?US(TDeliver - T0)), + ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_total_lat_us, ?US(TSent - T0)) + end), + Ok; + false -> + {ok, State}; + {error, {Reason, _Rest}} -> + %% Defer error handling: + {ok, {sock_error, Reason}, State}; + {error, Reason} -> + {ok, {sock_error, Reason}, State} + end. + +%% Some bytes sent +sent(_Num, _Oct, State = #state{gc_tracker = {ActiveN, {PktsIn, BytesIn}, _Out}}) when + PktsIn > ActiveN +-> + %% Run GC and check OOM after certain amount of messages or bytes received. + trigger_gc(PktsIn, BytesIn, ActiveN, State); +sent(Num, Oct, State = #state{gc_tracker = {ActiveN, In, {PktsOut, BytesOut}}}) -> + %% Run GC and check OOM after certain amount of messages or bytes sent. + NBytes = BytesOut + Oct, + case PktsOut + Num of + NPkts when NPkts > ActiveN -> + trigger_gc(NPkts, NBytes, ActiveN, State); + NPkts -> + NState = State#state{gc_tracker = {ActiveN, In, {NPkts, NBytes}}}, + {ok, NState} + end. + +-compile({inline, [trigger_gc/4]}). +trigger_gc(NPkts, NBytes, ActiveN, State) -> + NState = State#state{gc_tracker = init_gc_tracker(ActiveN)}, + {ok, check_oom(NPkts, NBytes, run_gc(NPkts, NBytes, NState))}. + +%%-------------------------------------------------------------------- +%% Handle Info + +handle_info({sock_error, Reason}, State) -> + case Reason =/= closed andalso Reason =/= einval of + true -> ?SLOG(warning, #{msg => "socket_error", reason => Reason}); + false -> ok + end, + handle_info({sock_closed, Reason}, close_socket(State)); +%% handle QUIC control stream events +handle_info({quic, Event, Handle, Prop}, State) when is_atom(Event) -> + case emqx_quic_stream:Event(Handle, Prop, State) of + {{continue, Msgs}, NewState} -> + {ok, Msgs, NewState}; + Other -> + Other + end; +handle_info(Info, State) -> + with_channel(handle_info, [Info], State). + +%%-------------------------------------------------------------------- +%% Handle Info + +handle_cast( + {async_set_socket_options, SockOpts}, + State = #state{socket = Socket} +) -> + case sock_setopts(Socket, SockOpts) of + ok -> + ?tp(debug, "custom_socket_options_successfully", #{opts => SockOpts}); + {error, {invalid, {socket_option, SockOpt}}} -> + ?tp(warning, "unsupported_socket_keepalive", #{option => SockOpt}); + {error, Reason} when Reason == closed; Reason == einval -> + %% socket is already closed, ignore this error + ?tp(debug, "socket already closed", #{reason => socket_already_closed}), + ok; + Err -> + %% other errors + ?tp(error, "failed_to_set_custom_socket_option", #{reason => Err}) + end, + State; +handle_cast(Req, State) -> + ?tp(error, "received_unknown_cast", #{cast => Req}), + State. + +%%-------------------------------------------------------------------- +%% Run GC and Check OOM + +run_gc(Pubs, Bytes, State = #state{gc_state = GcSt, zone = Zone}) -> + case + ?ENABLED(GcSt) andalso not emqx_olp:backoff_gc(Zone) andalso + emqx_gc:run(Pubs, Bytes, GcSt) + of + false -> State; + {_IsGC, GcSt1} -> State#state{gc_state = GcSt1} + end. + +check_oom(Pubs, Bytes, State = #state{zone = Zone}) -> + ShutdownPolicy = emqx_config:get_zone_conf(Zone, [force_shutdown]), + case emqx_utils:check_oom(ShutdownPolicy) of + {shutdown, Reason} -> + %% triggers terminate/2 callback immediately + ?tp(warning, check_oom_shutdown, #{ + policy => ShutdownPolicy, + incoming_pubs => Pubs, + incoming_bytes => Bytes, + shutdown => Reason + }), + erlang:exit({shutdown, Reason}); + Result -> + ?tp(debug, check_oom_ok, #{ + policy => ShutdownPolicy, + incoming_pubs => Pubs, + incoming_bytes => Bytes, + result => Result + }), + ok + end, + State. + +%%-------------------------------------------------------------------- +%% Close Socket + +close_socket(State = #state{sockstate = closed}) -> + State; +close_socket(State = #state{socket = Socket}) -> + ok = esockd_socket:fast_close(Socket), + State#state{sockstate = closed}. + +socket_closed(State) -> + State#state{sockstate = closed}. + +%%-------------------------------------------------------------------- +%% Inc incoming/outgoing stats + +-compile({inline, [inc_incoming_stats/1]}). +inc_incoming_stats(Packet = ?PACKET(Type)) -> + inc_counter(recv_pkt, 1), + case Type of + ?PUBLISH -> + inc_counter(recv_msg, 1), + inc_qos_stats(recv_msg, Packet); + _ -> + ok + end, + emqx_metrics:inc_recv(Packet). + +-compile({inline, [inc_dropped_stats/0]}). +inc_dropped_stats() -> + inc_counter('send_msg.dropped', 1), + inc_counter('send_msg.dropped.too_large', 1). + +-compile({inline, [inc_outgoing_stats/1]}). +inc_outgoing_stats(Packet = ?PACKET(Type)) -> + inc_counter(send_pkt, 1), + case Type of + ?PUBLISH -> + inc_counter(send_msg, 1), + inc_qos_stats(send_msg, Packet); + _ -> + ok + end. + +-compile({inline, [inc_qos_stats/2]}). +inc_qos_stats(Type, Packet) -> + case emqx_packet:qos(Packet) of + ?QOS_0 when Type =:= send_msg -> inc_counter('send_msg.qos0', 1); + ?QOS_1 when Type =:= send_msg -> inc_counter('send_msg.qos1', 1); + ?QOS_2 when Type =:= send_msg -> inc_counter('send_msg.qos2', 1); + ?QOS_0 when Type =:= recv_msg -> inc_counter('recv_msg.qos0', 1); + ?QOS_1 when Type =:= recv_msg -> inc_counter('recv_msg.qos1', 1); + ?QOS_2 when Type =:= recv_msg -> inc_counter('recv_msg.qos2', 1); + %% for bad qos + _ -> ok + end. + +%%-------------------------------------------------------------------- +%% Helper functions + +-compile({inline, [next_msgs/1]}). +next_msgs(Packet) when is_record(Packet, mqtt_packet) -> + {outgoing, Packet}; +next_msgs(Event) when is_tuple(Event) -> + Event; +next_msgs(More) when is_list(More) -> + More. + +-compile({inline, [shutdown/2, shutdown/3]}). +shutdown(Reason, State) -> + stop({shutdown, Reason}, State). + +shutdown(Reason, Reply, State) -> + stop({shutdown, Reason}, Reply, State). + +-compile({inline, [stop/2, stop/3]}). +stop(Reason, State) -> + {stop, Reason, State}. + +stop(Reason, Reply, State) -> + {stop, Reason, Reply, State}. + +inc_counter(Key, Inc) -> + _ = emqx_pd:inc_counter(Key, Inc), + ok. + +set_tcp_keepalive({tcp, Id}) -> + Conf = emqx_config:get_listener_conf(tcp, Id, [tcp_options, keepalive], "none"), + case Conf of + "none" -> + ok; + Value -> + {Idle, Interval, Probes} = emqx_schema:parse_tcp_keepalive(Value), + async_set_keepalive(self(), Idle, Interval, Probes) + end. + +-spec graceful_shutdown_transport(atom(), state()) -> state(). +graceful_shutdown_transport(_Reason, S = #state{socket = Socket}) -> + _ = socket:shutdown(Socket, read_write), + S#state{sockstate = closed}. + +start_timer(Time, Msg) -> + emqx_utils:start_timer(Time, Msg). + +%%-------------------------------------------------------------------- +%% For CT tests +%%-------------------------------------------------------------------- + +set_field(Name, Value, State) -> + Pos = emqx_utils:index_of(Name, record_info(fields, state)), + setelement(Pos + 1, State, Value). + +get_state(Pid) -> + State = sys:get_state(Pid), + maps:from_list( + lists:zip( + record_info(fields, state), + tl(tuple_to_list(State)) + ) + ). + +get_active_n(Type, Listener) -> + emqx_config:get_listener_conf(Type, Listener, [tcp_options, active_n]). From 28bc810e4355d92b5ea78654ac88490037c21b21 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jun 2025 18:14:33 +0200 Subject: [PATCH 04/31] test(emqx): make peeking into connection state module-aware --- apps/emqx/test/emqx_cth_broker.erl | 36 +++++++++++++++++++ apps/emqx/test/emqx_listeners_SUITE.erl | 4 +-- .../emqx/test/emqx_listeners_update_SUITE.erl | 9 ++--- .../emqx/test/emqx_mqtt_protocol_v5_SUITE.erl | 18 ++-------- .../test/emqx_persistent_session_SUITE.erl | 3 +- apps/emqx/test/emqx_shared_sub_SUITE.erl | 16 ++++----- .../test/emqx_mgmt_api_clients_SUITE.erl | 7 ++-- 7 files changed, 56 insertions(+), 37 deletions(-) create mode 100644 apps/emqx/test/emqx_cth_broker.erl diff --git a/apps/emqx/test/emqx_cth_broker.erl b/apps/emqx/test/emqx_cth_broker.erl new file mode 100644 index 00000000000..c72a6678fb9 --- /dev/null +++ b/apps/emqx/test/emqx_cth_broker.erl @@ -0,0 +1,36 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023-2025 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- + +-module(emqx_cth_broker). + +-compile(export_all). +-compile(nowarn_export_all). + +-spec connection_info(_Info, pid() | emqx_types:clientid()) -> _Value. +connection_info(Info, Client) when is_pid(Client) -> + connection_info(Info, emqtt_info(clientid, Client)); +connection_info(Info, ClientId) -> + [ChanPid] = emqx_cm:lookup_channels(ClientId), + ConnMod = emqx_cm:do_get_chann_conn_mod(ClientId, ChanPid), + get_connection_info(Info, ConnMod, sys:get_state(ChanPid)). + +-spec connection_state(pid() | emqx_types:clientid()) -> _Value. +connection_state(Client) when is_pid(Client) -> + connection_state(emqtt_info(clientid, Client)); +connection_state(ClientId) -> + [ChanPid] = emqx_cm:lookup_channels(ClientId), + ConnMod = emqx_cm:do_get_chann_conn_mod(ClientId, ChanPid), + ConnMod:get_state(ChanPid). + +get_connection_info(connmod, ConnMod, _State) -> + ConnMod; +get_connection_info(Info, emqx_connection, State) -> + emqx_connection:info(Info, State); +get_connection_info(Info, emqx_socket_connection, State) -> + emqx_socket_connection:info(Info, State); +get_connection_info(Info, emqx_ws_connection, {_WSState, ConnState, _}) -> + emqx_ws_connection:info(Info, ConnState). + +emqtt_info(Key, Client) -> + proplists:get_value(Key, emqtt:info(Client), undefined). diff --git a/apps/emqx/test/emqx_listeners_SUITE.erl b/apps/emqx/test/emqx_listeners_SUITE.erl index b245038d572..ec447696954 100644 --- a/apps/emqx/test/emqx_listeners_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_SUITE.erl @@ -181,9 +181,7 @@ t_tcp_frame_parsing_conn(_Config) -> with_listener(tcp, ?FUNCTION_NAME, Conf, fun() -> Client = emqtt_connect_tcp({127, 0, 0, 1}, Port), pong = emqtt:ping(Client), - ClientId = proplists:get_value(clientid, emqtt:info(Client)), - [CPid] = emqx_cm:lookup_channels(ClientId), - CState = emqx_connection:get_state(CPid), + CState = emqx_cth_broker:connection_state(Client), ?assertMatch(#{listener := {tcp, ?FUNCTION_NAME}}, CState), emqx_listeners:is_packet_parser_available(mqtt) andalso ?assertMatch(#{parser := {frame, _Options}}, CState) diff --git a/apps/emqx/test/emqx_listeners_update_SUITE.erl b/apps/emqx/test/emqx_listeners_update_SUITE.erl index 933660144af..a747e2e2717 100644 --- a/apps/emqx/test/emqx_listeners_update_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_update_SUITE.erl @@ -235,7 +235,7 @@ test_change_parse_unit(ConfPath, ClientOpts) -> ?assertMatch({ok, _}, emqx:update_config(ConfPath, {update, ListenerRawConf1})), Client1 = emqtt_connect(ClientOpts), pong = emqtt:ping(Client1), - CState1 = get_conn_state(Client1), + CState1 = emqx_cth_broker:connection_state(Client1), emqx_listeners:is_packet_parser_available(mqtt) andalso ?assertMatch( #{parser := {frame, _Options}}, @@ -245,7 +245,7 @@ test_change_parse_unit(ConfPath, ClientOpts) -> ?assertMatch({ok, _}, emqx:update_config(ConfPath, {update, ListenerRawConf0})), Client2 = emqtt_connect(ClientOpts), pong = emqtt:ping(Client2), - CState2 = get_conn_state(Client2), + CState2 = emqx_cth_broker:connection_state(Client2), emqx_listeners:is_packet_parser_available(mqtt) andalso ?assertMatch( #{parser := Parser} when Parser =/= map_get(parser, CState1), @@ -424,8 +424,3 @@ emqtt_connect(Opts) -> {error, Reason} -> error(Reason, [Opts]) end. - -get_conn_state(Client) -> - ClientId = proplists:get_value(clientid, emqtt:info(Client)), - [CPid | _] = emqx_cm:lookup_channels(ClientId), - emqx_connection:get_state(CPid). diff --git a/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl b/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl index a7685816e74..0a24280457e 100644 --- a/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl +++ b/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl @@ -100,16 +100,6 @@ end_per_testcase(TestCase, Config) -> client_info(Key, Client) -> proplists:get_value(Key, emqtt:info(Client), undefined). -connection_info(Info, ClientPid, Config) when is_list(Config) -> - connection_info(Info, ClientPid, ?config(conn_type, Config)); -connection_info(Info, ClientPid, tcp) -> - emqx_connection:info(Info, sys:get_state(ClientPid)); -connection_info(Info, ClientPid, quic) -> - emqx_connection:info(Info, sys:get_state(ClientPid)); -connection_info(Info, ClientPid, ws) -> - {_WSState, ConnState, _} = sys:get_state(ClientPid), - emqx_ws_connection:info(Info, ConnState). - receive_messages(Count) -> receive_messages(Count, []). @@ -283,8 +273,7 @@ t_connect_will_message(Config) -> | Config ]), {ok, _} = emqtt:ConnFun(Client1), - [ClientPid] = emqx_cm:lookup_channels(client_info(clientid, Client1)), - WillMsg = connection_info({channel, will_msg}, ClientPid, Config), + WillMsg = emqx_cth_broker:connection_info({channel, will_msg}, Client1), %% [MQTT-3.1.2-7] ?assertNotEqual(undefined, WillMsg), @@ -468,13 +457,12 @@ t_connect_emit_stats_timeout(Config) -> {ok, _} = emqtt:ConnFun(Client), %% Poke the connection to ensure stats timer is armed. pong = emqtt:ping(Client), - [ClientPid] = emqx_cm:lookup_channels(client_info(clientid, Client)), ?assertMatch( TRef when is_reference(TRef), - connection_info(stats_timer, ClientPid, Config) + emqx_cth_broker:connection_info(stats_timer, Client) ), ?block_until(#{?snk_kind := cancel_stats_timer}, IdleTimeout * 2, _BackInTime = 0), - ?assertEqual(undefined, connection_info(stats_timer, ClientPid, Config)), + ?assertEqual(undefined, emqx_cth_broker:connection_info(stats_timer, Client)), ok = emqtt:disconnect(Client). %% [MQTT-3.1.2-22] diff --git a/apps/emqx/test/emqx_persistent_session_SUITE.erl b/apps/emqx/test/emqx_persistent_session_SUITE.erl index 2c086130eb6..f1f221bebbd 100644 --- a/apps/emqx/test/emqx_persistent_session_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_session_SUITE.erl @@ -355,13 +355,12 @@ t_choose_impl(Config) -> | Config ]), {ok, _} = emqtt:ConnFun(Client), - [ChanPid] = emqx_cm:lookup_channels(ClientId), ?assertEqual( case ?config(persistence, Config) of false -> emqx_session_mem; ds -> emqx_persistent_session_ds end, - emqx_connection:info({channel, {session, impl}}, sys:get_state(ChanPid)) + emqx_cth_broker:connection_info({channel, {session, impl}}, ClientId) ), ok = emqtt:disconnect(Client). diff --git a/apps/emqx/test/emqx_shared_sub_SUITE.erl b/apps/emqx/test/emqx_shared_sub_SUITE.erl index 64076ac8dfb..1e1d4d353aa 100644 --- a/apps/emqx/test/emqx_shared_sub_SUITE.erl +++ b/apps/emqx/test/emqx_shared_sub_SUITE.erl @@ -792,16 +792,16 @@ t_qos1_random_dispatch_if_all_members_are_down(Config) when is_list(Config) -> [Pid1, Pid2] = emqx_shared_sub:subscribers(Group, Topic), ?assert(is_process_alive(Pid1)), ?assert(is_process_alive(Pid2)), - ?retry(100, 10, ?assertEqual(disconnected, get_channel_info(conn_state, Pid1))), - ?retry(100, 10, ?assertEqual(disconnected, get_channel_info(conn_state, Pid2))), + ?retry(100, 10, ?assertEqual(disconnected, get_channel_info(conn_state, ClientId1))), + ?retry(100, 10, ?assertEqual(disconnected, get_channel_info(conn_state, ClientId2))), {ok, _} = emqtt:publish(ConnPub, Topic, <<"hello11">>, 1), ?retry( 100, 10, begin - Msgs1 = emqx_mqueue:to_list(get_mqueue(Pid1)), - Msgs2 = emqx_mqueue:to_list(get_mqueue(Pid2)), + Msgs1 = emqx_mqueue:to_list(get_mqueue(ClientId1)), + Msgs2 = emqx_mqueue:to_list(get_mqueue(ClientId2)), %% assert the message is in mqueue (because socket is closed) ?assertMatch([#message{payload = <<"hello11">>}], Msgs1 ++ Msgs2) end @@ -809,11 +809,11 @@ t_qos1_random_dispatch_if_all_members_are_down(Config) when is_list(Config) -> emqtt:stop(ConnPub), ok. -get_mqueue(ConnPid) -> - get_channel_info({session, mqueue}, ConnPid). +get_mqueue(Client) -> + get_channel_info({session, mqueue}, Client). -get_channel_info(Info, ConnPid) -> - emqx_connection:info({channel, Info}, sys:get_state(ConnPid)). +get_channel_info(Info, Client) -> + emqx_cth_broker:connection_info({channel, Info}, Client). %% No ack, QoS 2 subscriptions, %% client1 receives one message, send pubrec, then suspend diff --git a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl index d21cea86343..0c1488faf52 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_clients_SUITE.erl @@ -1090,10 +1090,13 @@ t_keepalive(Config) -> {ok, _} = emqtt:connect(C1), [Pid] = emqx_cm:lookup_channels(list_to_binary(ClientId)), %% will reset to max keepalive if keepalive > max keepalive - #{conninfo := #{keepalive := InitKeepalive}} = emqx_connection:info(Pid), + ?assertMatch( + #{conninfo := #{keepalive := InitKeepalive}}, + emqx_cm:get_chan_info(list_to_binary(ClientId)) + ), ?assertMatch( #{max_idle_millisecond := 65536500}, - emqx_connection:info({channel, keepalive}, sys:get_state(Pid)) + emqx_cth_broker:connection_info({channel, keepalive}, list_to_binary(ClientId)) ), ?assertMatch( From 3a6b0206322160ba790c6f6180d3be4cf7c1f925 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 1 Jul 2025 13:22:49 +0200 Subject: [PATCH 05/31] test(emqx): adapt PP2 mocks to `esockd_socket` listeners --- apps/emqx/test/emqx_cth_listener.erl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/apps/emqx/test/emqx_cth_listener.erl b/apps/emqx/test/emqx_cth_listener.erl index d9ade266212..f0266a8c285 100644 --- a/apps/emqx/test/emqx_cth_listener.erl +++ b/apps/emqx/test/emqx_cth_listener.erl @@ -91,11 +91,11 @@ meck_recv_ppv2(tcp) -> ok = meck:expect( esockd_proxy_protocol, recv, - fun(_Transport, Socket, _Timeout) -> + fun(Transport, Socket, _Timeout) -> SNI = persistent_term:get(current_client_sni, undefined), - {ok, {SrcAddr, SrcPort}} = esockd_transport:peername(Socket), - {ok, {DstAddr, DstPort}} = esockd_transport:sockname(Socket), - {ok, #proxy_socket{ + {ok, {SrcAddr, SrcPort}} = Transport:peername(Socket), + {ok, {DstAddr, DstPort}} = Transport:sockname(Socket), + ProxySocket = #proxy_socket{ inet = inet4, socket = Socket, src_addr = SrcAddr, @@ -103,7 +103,16 @@ meck_recv_ppv2(tcp) -> src_port = SrcPort, dst_port = DstPort, pp2_additional_info = [{pp2_authority, SNI}] - }} + }, + %% See `esockd_proxy_protocol:recv/3` implementation. + case Transport of + esockd_transport -> + {ok, ProxySocket}; + esockd_socket -> + ProxyAttrs = esockd_proxy_protocol:get_proxy_attrs(ProxySocket), + ok = socket:setopt(Socket, {otp, meta}, ProxyAttrs), + {ok, Socket} + end end ); meck_recv_ppv2(ws) -> From aad4e5afe05951144fbed6403c6f2198eee09ed3 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 1 Jul 2025 14:46:33 +0200 Subject: [PATCH 06/31] fix(sessds): make `get_session_state/1` connmod-aware --- apps/emqx/src/emqx_persistent_session_ds.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index d005eb3c4bf..be39b72ec9c 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -1474,10 +1474,11 @@ try_get_live_session(ClientID) -> case emqx_cm:lookup_channels(local, ClientID) of [Pid] -> try - #{channel := ChanState} = emqx_connection:get_state(Pid), - case emqx_channel:info(impl, ChanState) of + ConnMod = emqx_cm:do_get_chann_conn_mod(ClientID, Pid), + ConnState = sys:get_state(Pid), + case apply(ConnMod, info, [{channel, impl}, ConnState]) of ?MODULE -> - {Pid, emqx_channel:info(session_state, ChanState)}; + {Pid, apply(ConnMod, info, [{channel, session_state}, ConnState])}; _ -> not_persistent end From e6edd2521a221c350e5e598f880b946f06a1ff60 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 1 Jul 2025 16:30:09 +0200 Subject: [PATCH 07/31] fix(eviction): make aware of new connection module --- apps/emqx_eviction_agent/mix.exs | 2 +- apps/emqx_eviction_agent/src/emqx_eviction_agent.erl | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/emqx_eviction_agent/mix.exs b/apps/emqx_eviction_agent/mix.exs index d9959dc2133..7d09eecf41e 100644 --- a/apps/emqx_eviction_agent/mix.exs +++ b/apps/emqx_eviction_agent/mix.exs @@ -5,7 +5,7 @@ defmodule EMQXEvictionAgent.MixProject do def project do [ app: :emqx_eviction_agent, - version: "5.1.11", + version: "5.1.12", build_path: "../../_build", erlc_options: UMP.erlc_options(), erlc_paths: UMP.erlc_paths(), diff --git a/apps/emqx_eviction_agent/src/emqx_eviction_agent.erl b/apps/emqx_eviction_agent/src/emqx_eviction_agent.erl index edd896a7424..b518349d66e 100644 --- a/apps/emqx_eviction_agent/src/emqx_eviction_agent.erl +++ b/apps/emqx_eviction_agent/src/emqx_eviction_agent.erl @@ -61,7 +61,11 @@ -export_type([server_reference/0, kind/0, options/0]). -define(CONN_MODULES, [ - emqx_connection, emqx_ws_connection, emqx_quic_connection, emqx_eviction_agent_channel + emqx_connection, + emqx_socket_connection, + emqx_ws_connection, + emqx_quic_connection, + emqx_eviction_agent_channel ]). %%-------------------------------------------------------------------- From 0761af5e6894671220aebf2d23b899403e7d114d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 2 Jul 2025 14:05:01 +0200 Subject: [PATCH 08/31] test(emqx): cleanup `emqx_client_SUITE` and unbreak 2 testcases --- apps/emqx/test/emqx_client_SUITE.erl | 118 ++++++++------------------- 1 file changed, 33 insertions(+), 85 deletions(-) diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index ef65f9f8d02..6949194e8c8 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -7,8 +7,6 @@ -compile(export_all). -compile(nowarn_export_all). --import(lists, [nth/2]). - -include_lib("emqx/include/emqx_mqtt.hrl"). -include_lib("emqx/include/emqx_hooks.hrl"). -include_lib("emqx/include/asserts.hrl"). @@ -16,24 +14,6 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). --define(TOPICS, [ - <<"TopicA">>, - <<"TopicA/B">>, - <<"Topic/C">>, - <<"TopicA/C">>, - <<"/TopicA">> -]). - --define(WILD_TOPICS, [ - <<"TopicA/+">>, - <<"+/C">>, - <<"#">>, - <<"/#">>, - <<"/+">>, - <<"+/+">>, - <<"TopicA/#">> -]). - -define(WAIT(EXPR, ATTEMPTS), ?retry(1000, ATTEMPTS, EXPR)). all() -> @@ -46,22 +26,21 @@ all() -> groups() -> [ - {mqttv3, [non_parallel_tests], [t_basic_v3]}, - {mqttv4, [non_parallel_tests], [ + {mqttv3, [], [t_basic_v3]}, + {mqttv4, [], [ t_basic_v4, t_cm, t_cm_registry, %% t_will_message, - %% t_offline_message_queueing, + t_offline_message_queueing, t_overlapping_subscriptions, %% t_keepalive, - %% t_redelivery_on_reconnect, - %% subscribe_failure_test, + t_redelivery_on_reconnect, t_dollar_topics, t_sub_non_utf8_topic ]}, - {mqttv5, [non_parallel_tests], [t_basic_with_props_v5, t_v5_receive_maximim_in_connack]}, - {others, [non_parallel_tests], [ + {mqttv5, [], [t_basic_with_props_v5, t_v5_receive_maximim_in_connack]}, + {others, [], [ t_username_as_clientid, t_certcn_as_alias, t_certdn_as_alias, @@ -136,15 +115,7 @@ t_idle_timeout_infinity(_) -> {ok, C} = emqtt:start_link([{clientid, ClientId}]), {ok, _} = emqtt:connect(C), ?WAIT(#{clientinfo := #{clientid := ClientId}} = emqx_cm:get_chan_info(ClientId), 2), - emqtt:subscribe(C, <<"mytopic">>, 0), - ?WAIT( - begin - Stats = emqx_cm:get_chan_stats(ClientId), - ?assertEqual(1, proplists:get_value(subscriptions_cnt, Stats)) - end, - 2 - ), - ok. + {ok, _, [0]} = emqtt:subscribe(C, <<"mytopic">>, 0). t_cm_registry(_) -> Children = supervisor:which_children(emqx_cm_sup), @@ -154,9 +125,10 @@ t_cm_registry(_) -> Pid ! <<"Unexpected info">>. t_will_message(_Config) -> + WillTopic = <<"TopicA/C">>, {ok, C1} = emqtt:start_link([ {clean_start, true}, - {will_topic, nth(3, ?TOPICS)}, + {will_topic, WillTopic}, {will_payload, <<"client disconnected">>}, {keepalive, 1} ]), @@ -165,13 +137,10 @@ t_will_message(_Config) -> {ok, C2} = emqtt:start_link(), {ok, _} = emqtt:connect(C2), - {ok, _, [2]} = emqtt:subscribe(C2, nth(3, ?TOPICS), 2), - timer:sleep(5), + {ok, _, [2]} = emqtt:subscribe(C2, WillTopic, 2), ok = emqtt:stop(C1), - timer:sleep(5), ?assertEqual(1, length(recv_msgs(1))), - ok = emqtt:disconnect(C2), - ct:pal("Will message test succeeded"). + ok = emqtt:disconnect(C2). t_offline_message_queueing(_) -> {ok, C1} = emqtt:start_link([ @@ -179,39 +148,36 @@ t_offline_message_queueing(_) -> {clientid, <<"c1">>} ]), {ok, _} = emqtt:connect(C1), - - {ok, _, [2]} = emqtt:subscribe(C1, nth(6, ?WILD_TOPICS), 2), + {ok, _, [2]} = emqtt:subscribe(C1, <<"+/+">>, 2), ok = emqtt:disconnect(C1), + {ok, C2} = emqtt:start_link([ {clean_start, true}, {clientid, <<"c2">>} ]), {ok, _} = emqtt:connect(C2), - ok = emqtt:publish(C2, nth(2, ?TOPICS), <<"qos 0">>, 0), - {ok, _} = emqtt:publish(C2, nth(3, ?TOPICS), <<"qos 1">>, 1), - {ok, _} = emqtt:publish(C2, nth(4, ?TOPICS), <<"qos 2">>, 2), + ok = emqtt:publish(C2, <<"TopicA/B">>, <<"qos 0">>, 0), + {ok, _} = emqtt:publish(C2, <<"Topic/C">>, <<"qos 1">>, 1), + {ok, _} = emqtt:publish(C2, <<"TopicA/C">>, <<"qos 2">>, 2), timer:sleep(10), emqtt:disconnect(C2), + {ok, C3} = emqtt:start_link([{clean_start, false}, {clientid, <<"c1">>}]), {ok, _} = emqtt:connect(C3), - - timer:sleep(10), - emqtt:disconnect(C3), - ?assertEqual(3, length(recv_msgs(3))). + ?assertEqual(3, length(recv_msgs(3))), + ok = emqtt:disconnect(C3). t_overlapping_subscriptions(_) -> {ok, C} = emqtt:start_link([]), {ok, _} = emqtt:connect(C), {ok, _, [2, 1]} = emqtt:subscribe(C, [ - {nth(7, ?WILD_TOPICS), 2}, - {nth(1, ?WILD_TOPICS), 1} + {<<"TopicA/#">>, 2}, + {<<"TopicA/+">>, 1} ]), timer:sleep(10), - {ok, _} = emqtt:publish(C, nth(4, ?TOPICS), <<"overlapping topic filters">>, 2), - timer:sleep(10), - + {ok, _} = emqtt:publish(C, <<"TopicA/C">>, <<"overlapping topic filters">>, 2), Num = length(recv_msgs(2)), ?assert(lists:member(Num, [1, 2])), if @@ -247,22 +213,20 @@ t_overlapping_subscriptions(_) -> %% ct:print("Keepalive test succeeded"). t_redelivery_on_reconnect(_) -> - ct:pal("Redelivery on reconnect test starting"), {ok, C1} = emqtt:start_link([{clean_start, false}, {clientid, <<"c">>}]), {ok, _} = emqtt:connect(C1), - - {ok, _, [2]} = emqtt:subscribe(C1, nth(7, ?WILD_TOPICS), 2), + {ok, _, [2]} = emqtt:subscribe(C1, <<"TopicA/#">>, 2), timer:sleep(10), ok = emqtt:pause(C1), {ok, _} = emqtt:publish( C1, - nth(2, ?TOPICS), + <<"TopicA/B">>, <<>>, [{qos, 1}, {retain, false}] ), {ok, _} = emqtt:publish( C1, - nth(4, ?TOPICS), + <<"TopicA/C">>, <<>>, [{qos, 2}, {retain, false}] ), @@ -271,37 +235,24 @@ t_redelivery_on_reconnect(_) -> ?assertEqual(0, length(recv_msgs(2))), {ok, C2} = emqtt:start_link([{clean_start, false}, {clientid, <<"c">>}]), {ok, _} = emqtt:connect(C2), - - timer:sleep(10), - ok = emqtt:disconnect(C2), - ?assertEqual(2, length(recv_msgs(2))). - -%% t_subscribe_sys_topics(_) -> -%% ct:print("Subscribe failure test starting"), -%% {ok, C, _} = emqtt:start_link([]), -%% {ok, _, [2]} = emqtt:subscribe(C, <<"$SYS/#">>, 2), -%% timer:sleep(10), -%% ct:print("Subscribe failure test succeeded"). + ?assertEqual(2, length(recv_msgs(2))), + ok = emqtt:disconnect(C2). t_dollar_topics(_) -> - ct:pal("$ topics test starting"), {ok, C} = emqtt:start_link([ {clean_start, true}, {keepalive, 0} ]), {ok, _} = emqtt:connect(C), - - {ok, _, [1]} = emqtt:subscribe(C, nth(6, ?WILD_TOPICS), 1), + {ok, _, [1]} = emqtt:subscribe(C, <<"+/+">>, 1), {ok, _} = emqtt:publish( C, - <<<<"$">>/binary, (nth(2, ?TOPICS))/binary>>, + <<"$TopicA/B">>, <<"test">>, [{qos, 1}, {retain, false}] ), - timer:sleep(10), ?assertEqual(0, length(recv_msgs(1))), - ok = emqtt:disconnect(C), - ct:pal("$ topics test succeeded"). + ok = emqtt:disconnect(C). t_sub_non_utf8_topic(_) -> {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), @@ -359,7 +310,7 @@ t_v5_receive_maximim_in_connack(_) -> %%-------------------------------------------------------------------- run_basic(Opts) -> - Topic = nth(1, ?TOPICS), + Topic = <<"TopicA">>, {ok, C} = emqtt:start_link(Opts), {ok, _} = emqtt:connect(C), {ok, _, [1]} = emqtt:subscribe(C, Topic, qos1), @@ -577,11 +528,8 @@ recv_msgs(0, Msgs) -> recv_msgs(Count, Msgs) -> receive {publish, Msg} -> - recv_msgs(Count - 1, [Msg | Msgs]); - %%TODO:: remove the branch? - _Other -> - recv_msgs(Count, Msgs) - after 100 -> + recv_msgs(Count - 1, [Msg | Msgs]) + after 1000 -> Msgs end. From 1f9dc79b0d8d720796daaf89715ec1935e3f228a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 2 Jul 2025 20:29:58 +0200 Subject: [PATCH 09/31] feat(congestion): make module generic and connmod-aware --- apps/emqx/src/emqx_congestion.erl | 139 ++++++++++------------- apps/emqx/src/emqx_connection.erl | 46 +++++--- apps/emqx/src/emqx_socket_connection.erl | 54 ++++++--- apps/emqx/test/emqx_connection_SUITE.erl | 12 +- 4 files changed, 132 insertions(+), 119 deletions(-) diff --git a/apps/emqx/src/emqx_congestion.erl b/apps/emqx/src/emqx_congestion.erl index 0a5176bb1bf..8750d0d4991 100644 --- a/apps/emqx/src/emqx_congestion.erl +++ b/apps/emqx/src/emqx_congestion.erl @@ -5,29 +5,12 @@ -module(emqx_congestion). -export([ - maybe_alarm_conn_congestion/3, - cancel_alarms/3 + maybe_alarm_conn_congestion/2, + cancel_alarms/2 ]). -elvis([{elvis_style, invalid_dynamic_call, #{ignore => [emqx_congestion]}}]). --define(ALARM_CONN_CONGEST(Channel, Reason), - list_to_binary( - io_lib:format( - "~ts/~ts/~ts", - [ - Reason, - emqx_channel:info(clientid, Channel), - maps:get( - username, - emqx_channel:info(clientinfo, Channel), - <<"unknown_user">> - ) - ] - ) - ) -). - -define(ALARM_CONN_INFO_KEYS, [ socktype, sockname, @@ -45,73 +28,68 @@ -define(ALARM_SENT(REASON), {alarm_sent, REASON}). -define(ALL_ALARM_REASONS, [conn_congestion]). -maybe_alarm_conn_congestion(Socket, Transport, Channel) -> - case is_alarm_enabled(Channel) of - false -> - ok; - true -> - case is_tcp_congested(Socket, Transport) of - true -> alarm_congestion(Socket, Transport, Channel, conn_congestion); - false -> cancel_alarm_congestion(Socket, Transport, Channel, conn_congestion) - end +maybe_alarm_conn_congestion(ConnMod, State) -> + Zone = ConnMod:info({channel, zone}, State), + Opts = emqx_config:get_zone_conf(Zone, [conn_congestion]), + case Opts of + #{enable_alarm := true} -> + case is_tcp_congested(ConnMod, State) of + true -> alarm_congestion(ConnMod, State, conn_congestion); + false -> cancel_alarm_congestion(ConnMod, State, conn_congestion, Opts) + end; + #{enable_alarm := false} -> + ok end. -cancel_alarms(Socket, Transport, Channel) -> +cancel_alarms(ConnMod, State) -> lists:foreach( fun(Reason) -> case has_alarm_sent(Reason) of - true -> do_cancel_alarm_congestion(Socket, Transport, Channel, Reason); + true -> do_cancel_alarm_congestion(ConnMod, State, Reason); false -> ok end end, ?ALL_ALARM_REASONS ). -is_alarm_enabled(Channel) -> - Zone = emqx_channel:info(zone, Channel), - emqx_config:get_zone_conf(Zone, [conn_congestion, enable_alarm]). - -alarm_congestion(Socket, Transport, Channel, Reason) -> +alarm_congestion(ConnMod, State, Reason) -> case has_alarm_sent(Reason) of false -> - do_alarm_congestion(Socket, Transport, Channel, Reason); + do_alarm_congestion(ConnMod, State, Reason); true -> %% pretend we have sent an alarm again update_alarm_sent_at(Reason) end. -cancel_alarm_congestion(Socket, Transport, Channel, Reason) -> - Zone = emqx_channel:info(zone, Channel), - WontClearIn = emqx_config:get_zone_conf(Zone, [ - conn_congestion, - min_alarm_sustain_duration - ]), +cancel_alarm_congestion(ConnMod, State, Reason, Opts) -> + #{min_alarm_sustain_duration := WontClearIn} = Opts, case has_alarm_sent(Reason) andalso long_time_since_last_alarm(Reason, WontClearIn) of - true -> do_cancel_alarm_congestion(Socket, Transport, Channel, Reason); + true -> do_cancel_alarm_congestion(ConnMod, State, Reason); false -> ok end. -do_alarm_congestion(Socket, Transport, Channel, Reason) -> +do_alarm_congestion(ConnMod, State, Reason) -> ok = update_alarm_sent_at(Reason), - AlarmDetails = tcp_congestion_alarm_details(Socket, Transport, Channel), - Message = io_lib:format("connection congested: ~0p", [AlarmDetails]), - emqx_alarm:activate(?ALARM_CONN_CONGEST(Channel, Reason), AlarmDetails, Message), + Name = tcp_congestion_alarm_name(Reason, ConnMod, State), + Details = tcp_congestion_alarm_details(ConnMod, State), + Message = io_lib:format("connection congested: ~0p", [Details]), + emqx_alarm:activate(Name, Details, Message), ok. -do_cancel_alarm_congestion(Socket, Transport, Channel, Reason) -> +do_cancel_alarm_congestion(ConnMod, State, Reason) -> ok = remove_alarm_sent_at(Reason), - AlarmDetails = tcp_congestion_alarm_details(Socket, Transport, Channel), - Message = io_lib:format("connection congested: ~0p", [AlarmDetails]), - emqx_alarm:ensure_deactivated(?ALARM_CONN_CONGEST(Channel, Reason), AlarmDetails, Message), + Name = tcp_congestion_alarm_name(Reason, ConnMod, State), + Details = tcp_congestion_alarm_details(ConnMod, State), + Message = io_lib:format("connection congested: ~0p", [Details]), + emqx_alarm:ensure_deactivated(Name, Details, Message), ok. -is_tcp_congested(_Socket, esockd_socket) -> - %% TODO: No such concept in `socket`-based sockets. - false; -is_tcp_congested(Socket, Transport) -> - case Transport:getstat(Socket, [send_pend]) of - {ok, [{send_pend, N}]} when N > 0 -> true; - _ -> false +is_tcp_congested(ConnMod, State) -> + case ConnMod:sockstats([send_pend], State) of + [{send_pend, N}] -> + N > 0; + _ -> + false end. has_alarm_sent(Reason) -> @@ -119,17 +97,21 @@ has_alarm_sent(Reason) -> 0 -> false; _ -> true end. + update_alarm_sent_at(Reason) -> erlang:put(?ALARM_SENT(Reason), timenow()), ok. + remove_alarm_sent_at(Reason) -> erlang:erase(?ALARM_SENT(Reason)), ok. + get_alarm_sent_at(Reason) -> case erlang:get(?ALARM_SENT(Reason)) of undefined -> 0; LastSentAt -> LastSentAt end. + long_time_since_last_alarm(Reason, WontClearIn) -> %% only sent clears when the alarm was not triggered in the last %% WontClearIn time @@ -144,25 +126,26 @@ timenow() -> %%============================================================================== %% Alarm message %%============================================================================== -tcp_congestion_alarm_details(Socket, Transport, Channel) -> + +tcp_congestion_alarm_name(Reason, ConnMod, State) -> + ClientId = ConnMod:info({channel, clientid}, State), + ClientInfo = ConnMod:info({channel, clientinfo}, State), + emqx_utils:format("~ts/~ts/~ts", [ + Reason, + ClientId, + maps:get(username, ClientInfo, <<"unknown_user">>) + ]). + +tcp_congestion_alarm_details(ConnMod, State) -> ProcInfo = process_info(self(), ?PROC_INFO_KEYS), BasicInfo = [{pid, list_to_binary(pid_to_list(self()))} | ProcInfo], - Stat = - case Transport:getstat(Socket, ?ALARM_SOCK_STATS_KEYS) of - {ok, Stat0} -> Stat0; - {error, _} -> [] - end, - Opts = - case Transport:getopts(Socket, ?ALARM_SOCK_OPTS_KEYS) of - {ok, Opts0} -> Opts0; - {error, _} -> [] - end, - SockInfo = Stat ++ Opts, - ConnInfo = [conn_info(Key, Channel) || Key <- ?ALARM_CONN_INFO_KEYS], - maps:from_list(BasicInfo ++ ConnInfo ++ SockInfo). - -conn_info(Key, Channel) when Key =:= sockname; Key =:= peername -> - {IPStr, Port} = emqx_channel:info(Key, Channel), - {Key, iolist_to_binary([inet:ntoa(IPStr), ":", integer_to_list(Port)])}; -conn_info(Key, Channel) -> - {Key, emqx_channel:info(Key, Channel)}. + Stat = ConnMod:sockstats(?ALARM_SOCK_STATS_KEYS, State), + Opts = ConnMod:sockopts(?ALARM_SOCK_OPTS_KEYS, State), + ConnInfo = [conn_info(Key, ConnMod, State) || Key <- ?ALARM_CONN_INFO_KEYS], + maps:from_list(BasicInfo ++ ConnInfo ++ Stat ++ Opts). + +conn_info(Key, ConnMod, State) when Key =:= sockname; Key =:= peername -> + {Addr, Port} = ConnMod:info(Key, State), + {Key, iolist_to_binary([inet:ntoa(Addr), ":", integer_to_binary(Port)])}; +conn_info(Key, ConnMod, State) -> + {Key, ConnMod:info({channel, Key}, State)}. diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index f58cde38f4e..8c7c8998466 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -39,6 +39,12 @@ stats/1 ]). +%% `emqx_congestion` callbacks: +-export([ + sockstats/2, + sockopts/2 +]). + -export([ async_set_keepalive/3, async_set_keepalive/5, @@ -209,20 +215,28 @@ info({channel, Info}, #state{channel = Channel}) -> -spec stats(pid() | state()) -> emqx_types:stats(). stats(CPid) when is_pid(CPid) -> call(CPid, stats); -stats(#state{ - transport = Transport, - socket = Socket, - channel = Channel -}) -> - SockStats = - case Transport:getstat(Socket, ?SOCK_STATS) of - {ok, Ss} -> Ss; - {error, _} -> [] - end, +stats(State = #state{channel = Channel}) -> + SockStats = sockstats(?SOCK_STATS, State), ChanStats = emqx_channel:stats(Channel), ProcStats = emqx_utils:proc_stats(), lists:append([SockStats, ChanStats, ProcStats]). +%% @doc Gather socket statistics, for `emqx_congestion` alarms. +-spec sockstats([atom()], state()) -> emqx_types:stats(). +sockstats(Keys, #state{socket = Socket, transport = Transport}) -> + case Transport:getstat(Socket, Keys) of + {ok, Ss} -> Ss; + {error, _} -> [] + end. + +%% @doc Gather socket options, for `emqx_congestion` alarms. +-spec sockopts([atom()], state()) -> emqx_types:stats(). +sockopts(Names, #state{socket = Socket, transport = Transport}) -> + case Transport:getopts(Socket, Names) of + {ok, Opts} -> Opts; + {error, _} -> [] + end. + %% @doc Set TCP keepalive socket options to override system defaults. %% Idle: The number of seconds a connection needs to be idle before %% TCP begins sending out keep-alive probes (Linux default 7200). @@ -597,14 +611,12 @@ handle_msg(Msg, State) -> terminate( Reason, State = #state{ - channel = Channel, - transport = Transport, - socket = Socket + channel = Channel } ) -> try Channel1 = emqx_channel:set_conn_state(disconnected, Channel), - emqx_congestion:cancel_alarms(Socket, Transport, Channel1), + emqx_congestion:cancel_alarms(?MODULE, State), emqx_channel:terminate(Reason, Channel1), close_socket_ok(State), ?TRACE("SOCKET", "emqx_connection_terminated", #{reason => Reason}) @@ -680,14 +692,12 @@ handle_timeout( _TRef, emit_stats, State = #state{ - channel = Channel, - transport = Transport, - socket = Socket + channel = Channel } ) -> ClientId = emqx_channel:info(clientid, Channel), emqx_cm:set_chan_stats(ClientId, stats(State)), - emqx_congestion:maybe_alarm_conn_congestion(Socket, Transport, Channel), + emqx_congestion:maybe_alarm_conn_congestion(?MODULE, State), {ok, State#state{stats_timer = undefined}}; handle_timeout( TRef, diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index f9b370fd2fc..22ee13477b1 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -34,6 +34,12 @@ stats/1 ]). +%% `emqx_congestion` callbacks: +-export([ + sockstats/2, + sockopts/2 +]). + -export([ async_set_keepalive/4, async_set_socket_options/2 @@ -168,9 +174,17 @@ info({channel, Info}, #state{channel = Channel}) -> -spec stats(pid() | state()) -> emqx_types:stats(). stats(CPid) when is_pid(CPid) -> call(CPid, stats); -stats(#state{socket = Socket, channel = Channel}) -> +stats(State = #state{channel = Channel}) -> + SockStats = sockstats(?SOCK_STATS, State), + ChanStats = emqx_channel:stats(Channel), + ProcStats = emqx_utils:proc_stats(), + lists:append([SockStats, ChanStats, ProcStats]). + +%% @doc Gather socket statistics, for `emqx_congestion` alarms. +-spec sockstats([atom()], state()) -> emqx_types:stats(). +sockstats(Keys, #state{socket = Socket}) -> #{counters := Counters} = socket:info(Socket), - SockStats = lists:map( + lists:map( fun (S = recv_oct) -> {S, maps:get(read_byte, Counters, 0)}; (S = recv_cnt) -> {S, maps:get(read_pkg, Counters, 0)}; @@ -178,11 +192,25 @@ stats(#state{socket = Socket, channel = Channel}) -> (S = send_cnt) -> {S, maps:get(write_pkg, Counters, 0)}; (S = send_pend) -> {S, 0} end, - ?SOCK_STATS - ), - ChanStats = emqx_channel:stats(Channel), - ProcStats = emqx_utils:proc_stats(), - lists:append([SockStats, ChanStats, ProcStats]). + Keys + ). + +%% @doc Gather socket options, for `emqx_congestion` alarms. +-spec sockopts([atom()], state()) -> emqx_types:stats(). +sockopts(Names, #state{socket = Socket}) -> + emqx_utils:flattermap( + fun + (buffer = N) -> sockopt_val(N, socket:getopt(Socket, {otp, rcvbuf})); + (recbuf = N) -> sockopt_val(N, socket:getopt(Socket, {socket, rcvbuf})); + (sndbuf = N) -> sockopt_val(N, socket:getopt(Socket, {socket, sndbuf})); + (high_watermark) -> _NA = []; + (high_msgq_watermark) -> _NA = [] + end, + Names + ). + +sockopt_val(Name, {ok, V}) -> {Name, V}; +sockopt_val(_, {error, _}) -> []. %% @doc Set TCP keepalive socket options to override system defaults. %% Idle: The number of seconds a connection needs to be idle before @@ -566,14 +594,11 @@ handle_msg(Msg, State) -> -spec terminate(any(), state()) -> no_return(). terminate( Reason, - State = #state{ - channel = Channel, - socket = Socket - } + State = #state{channel = Channel} ) -> try Channel1 = emqx_channel:set_conn_state(disconnected, Channel), - emqx_congestion:cancel_alarms(Socket, esockd_socket, Channel1), + emqx_congestion:cancel_alarms(?MODULE, State), emqx_channel:terminate(Reason, Channel1), close_socket_ok(State), ?TRACE("SOCKET", "emqx_connection_terminated", #{reason => Reason}) @@ -649,13 +674,12 @@ handle_timeout( _TRef, emit_stats, State = #state{ - channel = Channel, - socket = Socket + channel = Channel } ) -> ClientId = emqx_channel:info(clientid, Channel), emqx_cm:set_chan_stats(ClientId, stats(State)), - emqx_congestion:maybe_alarm_conn_congestion(Socket, esockd_socket, Channel), + emqx_congestion:maybe_alarm_conn_congestion(?MODULE, State), {ok, State#state{stats_timer = undefined}}; handle_timeout( TRef, diff --git a/apps/emqx/test/emqx_connection_SUITE.erl b/apps/emqx/test/emqx_connection_SUITE.erl index e86ecde5e94..dbb1b5b4f45 100644 --- a/apps/emqx/test/emqx_connection_SUITE.erl +++ b/apps/emqx/test/emqx_connection_SUITE.erl @@ -484,18 +484,14 @@ t_cancel_congestion_alarm(_) -> ), with_conn( fun(Pid) -> - #{ - channel := Channel, - transport := Transport, - socket := Socket - } = emqx_connection:get_state(Pid), + State = sys:get_state(Pid), %% precondition - Zone = emqx_channel:info(zone, Channel), + Zone = emqx_connection:info({channel, zone}, State), true = emqx_config:get_zone_conf(Zone, [conn_congestion, enable_alarm]), %% should not raise errors - ok = emqx_congestion:maybe_alarm_conn_congestion(Socket, Transport, Channel), + ok = emqx_congestion:maybe_alarm_conn_congestion(emqx_connection, State), %% should not raise errors either - ok = emqx_congestion:cancel_alarms(Socket, Transport, Channel), + ok = emqx_congestion:cancel_alarms(emqx_connection, State), ok end, Opts From 280ac5a0f7ef916a44e594536ab96141c265f87f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 2 Jul 2025 20:32:31 +0200 Subject: [PATCH 10/31] feat(socket): use async send APIs and respect `send_timeout` option Behavior is slightly different from `emqx_connection`: there's no equivalent to `high_watermark`, so connection is never blocked on send. --- apps/emqx/src/emqx_socket_connection.erl | 173 +++++++++++++++++------ 1 file changed, 128 insertions(+), 45 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 22ee13477b1..35d96cf2c11 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -77,7 +77,7 @@ %% TCP/TLS Socket socket :: socket:socket(), %% Sock State - sockstate :: emqx_types:sockstate(), + sockstate :: idle | closed | congested(), %% Packet parser / serializer parser :: parser(), serialize :: emqx_frame:serialize_opts(), @@ -103,6 +103,14 @@ extra = [] }). +-record(congested, { + handle :: reference(), + deadline :: _TimestampMs :: integer(), + sendq :: [erlang:iovec()] +}). + +-type congested() :: #congested{}. + -type gc_tracker() :: { ActiveN :: non_neg_integer(), @@ -182,7 +190,7 @@ stats(State = #state{channel = Channel}) -> %% @doc Gather socket statistics, for `emqx_congestion` alarms. -spec sockstats([atom()], state()) -> emqx_types:stats(). -sockstats(Keys, #state{socket = Socket}) -> +sockstats(Keys, #state{socket = Socket, sockstate = SS}) -> #{counters := Counters} = socket:info(Socket), lists:map( fun @@ -190,7 +198,7 @@ sockstats(Keys, #state{socket = Socket}) -> (S = recv_cnt) -> {S, maps:get(read_pkg, Counters, 0)}; (S = send_oct) -> {S, maps:get(write_byte, Counters, 0)}; (S = send_cnt) -> {S, maps:get(write_pkg, Counters, 0)}; - (S = send_pend) -> {S, 0} + (S = send_pend) -> {S, sendq_bytesize(SS)} end, Keys ). @@ -526,18 +534,14 @@ handle_msg({'$gen_call', From, Req}, State) -> handle_msg({'$gen_cast', Req}, State) -> NewState = handle_cast(Req, State), {ok, NewState}; -handle_msg({'$socket', Socket, select, _Handle}, State) -> - case sock_async_recv(Socket, 0) of - {ok, Data} -> - handle_data(Data, true, State); - {error, {closed, Data}} -> - {ok, [{recv, Data}, {sock_closed, tcp_closed}], socket_closed(State)}; - {error, closed} -> - handle_info({sock_closed, tcp_closed}, socket_closed(State)); - {error, {Reason, Data}} -> - {ok, [{recv, Data}, {sock_error, Reason}], State}; - {error, Reason} -> - handle_info({sock_error, Reason}, State) +handle_msg({'$socket', Socket, select, Handle}, State = #state{sockstate = SS}) -> + case SS of + idle -> + handle_data_ready(Socket, State); + #congested{handle = Handle} -> + handle_send_ready(Socket, SS, State); + _ -> + handle_data_ready(Socket, State) end; handle_msg({'$socket', _Socket, abort, {_Handle, Reason}}, State) -> handle_info({sock_error, Reason}, State); @@ -674,13 +678,14 @@ handle_timeout( _TRef, emit_stats, State = #state{ - channel = Channel + channel = Channel, + sockstate = SS } ) -> ClientId = emqx_channel:info(clientid, Channel), emqx_cm:set_chan_stats(ClientId, stats(State)), emqx_congestion:maybe_alarm_conn_congestion(?MODULE, State), - {ok, State#state{stats_timer = undefined}}; + check_send_timeout(SS, State#state{stats_timer = undefined}); handle_timeout( TRef, keepalive, @@ -738,6 +743,7 @@ handle_data( -compile({inline, [request_more_data/4]}). request_more_data(Socket, More, Acc, State) -> + %% TODO: `{otp, select_read}`. case sock_async_recv(Socket, More) of {ok, DataMore} -> {ok, [Acc, {recv_more, DataMore}], State}; @@ -757,6 +763,21 @@ request_more_data(Socket, More, Acc, State) -> {ok, [Acc, {sock_error, Reason}], State} end. +-compile({inline, [handle_data_ready/2]}). +handle_data_ready(Socket, State) -> + case sock_async_recv(Socket, 0) of + {ok, Data} -> + handle_data(Data, true, State); + {error, {closed, Data}} -> + {ok, [{recv, Data}, {sock_closed, tcp_closed}], socket_closed(State)}; + {error, closed} -> + handle_info({sock_closed, tcp_closed}, socket_closed(State)); + {error, {Reason, Data}} -> + {ok, [{recv, Data}, {sock_error, Reason}], State}; + {error, Reason} -> + handle_info({sock_error, Reason}, State) + end. + %% @doc: return a reversed Msg list -compile({inline, [next_incoming_msgs/1]}). next_incoming_msgs([Packet]) -> @@ -913,21 +934,47 @@ serialize_and_inc_stats(#state{serialize = Serialize}, Packet) -> %% Send data -spec send(non_neg_integer(), iodata(), state()) -> {ok, state()}. -send(Num, IoData, #state{socket = Socket, sockstate = SS} = State) -> +send(Num, IoData, #state{socket = Socket, sockstate = idle} = State) -> Oct = iolist_size(IoData), - emqx_metrics:inc('bytes.sent', Oct), - %% FIXME timeout - case SS =/= closed andalso socket:send(Socket, IoData, 15_000) of + Handle = make_ref(), + case socket:send(Socket, IoData, Handle) of + ok -> + sent(Num, Oct, State); + {select, {_Info, Rest}} -> + sent(Num, Oct, queue_send(Handle, Rest, State)); + {select, _Info} -> + sent(Num, Oct, queue_send(Handle, IoData, State)); + {error, {Reason, _Rest}} -> + %% Defer error handling: + {ok, {sock_error, Reason}, State}; + {error, Reason} -> + {ok, {sock_error, Reason}, State} + end; +send(Num, IoData, #state{sockstate = SS = #congested{sendq = SQ, deadline = Deadline}} = State) -> + case erlang:monotonic_time(millisecond) of + BeforeDeadline when BeforeDeadline < Deadline -> + NState = State#state{sockstate = SS#congested{sendq = [IoData | SQ]}}, + sent(Num, iolist_size(IoData), NState); + _PastDeadline -> + {ok, {sock_error, send_timeout}, State} + end; +send(_Num, _IoVec, #state{sockstate = closed} = State) -> + {ok, State}. + +-compile({inline, [handle_send_ready/3]}). +handle_send_ready(Socket, SS = #congested{sendq = SQ}, State) -> + IoData = sendq_to_iodata(SQ, []), + Handle = make_ref(), + case socket:send(Socket, IoData, Handle) of ok -> - Ok = sent(Num, Oct, State), - ?BROKER_INSTR_WMARK(t0_deliver, {T0, TDeliver} when is_integer(T0), begin - TSent = ?BROKER_INSTR_TS(), - ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_delay_us, ?US(TDeliver - T0)), - ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_total_lat_us, ?US(TSent - T0)) - end), - Ok; - false -> {ok, State}; + {select, {_Info, Rest}} -> + %% Partially accepted, renew deadline. + {ok, queue_send(Handle, Rest, State)}; + {select, _Info} -> + %% Totally congested, keep the deadline. + NState = State#state{sockstate = SS#congested{sendq = IoData}}, + {ok, NState}; {error, {Reason, _Rest}} -> %% Defer error handling: {ok, {sock_error, Reason}, State}; @@ -935,27 +982,63 @@ send(Num, IoData, #state{socket = Socket, sockstate = SS} = State) -> {ok, {sock_error, Reason}, State} end. +queue_send(Handle, IoData, State = #state{sockstate = idle, listener = {Type, Name}}) -> + Timeout = emqx_config:get_listener_conf(Type, Name, [tcp_options, send_timeout], 15_000), + Deadline = erlang:monotonic_time(millisecond) + Timeout, + SockState = #congested{handle = Handle, deadline = Deadline, sendq = [IoData]}, + State#state{sockstate = SockState}. + +check_send_timeout(#congested{deadline = Deadline}, State) -> + case erlang:monotonic_time(millisecond) of + BeforeDeadline when BeforeDeadline < Deadline -> + {ok, State}; + _PastDeadline -> + {ok, {sock_error, send_timeout}, State} + end; +check_send_timeout(_, State) -> + {ok, State}. + +sendq_to_iodata([IoData | Rest], Acc) -> + sendq_to_iodata(Rest, [IoData | Acc]); +sendq_to_iodata([], Acc) -> + Acc. + +sendq_bytesize(#congested{sendq = SQ}) -> + erlang:iolist_size(SQ); +sendq_bytesize(_) -> + 0. + %% Some bytes sent -sent(_Num, _Oct, State = #state{gc_tracker = {ActiveN, {PktsIn, BytesIn}, _Out}}) when - PktsIn > ActiveN --> - %% Run GC and check OOM after certain amount of messages or bytes received. - trigger_gc(PktsIn, BytesIn, ActiveN, State); -sent(Num, Oct, State = #state{gc_tracker = {ActiveN, In, {PktsOut, BytesOut}}}) -> - %% Run GC and check OOM after certain amount of messages or bytes sent. - NBytes = BytesOut + Oct, - case PktsOut + Num of - NPkts when NPkts > ActiveN -> - trigger_gc(NPkts, NBytes, ActiveN, State); - NPkts -> - NState = State#state{gc_tracker = {ActiveN, In, {NPkts, NBytes}}}, - {ok, NState} - end. +sent( + Num, + Oct, + State = #state{gc_tracker = {ActiveN, In = {PktsIn, BytesIn}, {PktsOut, BytesOut}}} +) -> + %% TODO: Not actually "sent", as is `emqx_metrics:inc_sent/1`. + emqx_metrics:inc('bytes.sent', Oct), + NPktsOut = PktsOut + Num, + NBytesOut = BytesOut + Oct, + if + PktsIn > ActiveN -> + %% Run GC and check OOM after certain amount of messages or bytes received. + NState = trigger_gc(PktsIn, BytesIn, ActiveN, State); + NPktsOut > ActiveN -> + %% Run GC and check OOM after certain amount of messages or bytes sent. + NState = trigger_gc(NPktsOut, NBytesOut, ActiveN, State); + true -> + NState = State#state{gc_tracker = {ActiveN, In, {NPktsOut, NBytesOut}}} + end, + ?BROKER_INSTR_WMARK(t0_deliver, {T0, TDeliver} when is_integer(T0), begin + TSent = ?BROKER_INSTR_TS(), + ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_delay_us, ?US(TDeliver - T0)), + ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_total_lat_us, ?US(TSent - T0)) + end), + {ok, NState}. -compile({inline, [trigger_gc/4]}). trigger_gc(NPkts, NBytes, ActiveN, State) -> NState = State#state{gc_tracker = init_gc_tracker(ActiveN)}, - {ok, check_oom(NPkts, NBytes, run_gc(NPkts, NBytes, NState))}. + check_oom(NPkts, NBytes, run_gc(NPkts, NBytes, NState)). %%-------------------------------------------------------------------- %% Handle Info From a43ee50757f21d08cfa8015d491162fc65ed5952 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 2 Jul 2025 20:39:47 +0200 Subject: [PATCH 11/31] chore(conn): make `esockd_transport` connection emit `send_timeout` error Effectively mirroring `emqx_socket_connection` behavior. --- apps/emqx/src/emqx_connection.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 8c7c8998466..b888648df32 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -946,9 +946,11 @@ send(Num, IoData, #state{transport = Transport, socket = Socket} = State) -> ?BROKER_INSTR_OBSERVE_HIST(connection, deliver_total_lat_us, ?US(TSent - T0)) end), Ok; - {error, Reason} -> + {error, timeout} -> %% Defer error handling %% so it's handled the same as tcp_closed or ssl_closed + {ok, {sock_error, send_timeout}, State}; + {error, Reason} -> {ok, {sock_error, Reason}, State} end. From 63f64ec23c9cb7c680b969ace0a7bfae46701925 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 2 Jul 2025 20:41:39 +0200 Subject: [PATCH 12/31] test(conn): verify congestion and send timeout logic --- apps/emqx/test/emqx_client_SUITE.erl | 139 ++++++++++++++++++++------- 1 file changed, 105 insertions(+), 34 deletions(-) diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index 6949194e8c8..ef5cb289cd2 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -21,7 +21,8 @@ all() -> {group, mqttv3}, {group, mqttv4}, {group, mqttv5}, - {group, others} + {group, others}, + {group, misbehaving} ]. groups() -> @@ -36,8 +37,7 @@ groups() -> t_overlapping_subscriptions, %% t_keepalive, t_redelivery_on_reconnect, - t_dollar_topics, - t_sub_non_utf8_topic + t_dollar_topics ]}, {mqttv5, [], [t_basic_with_props_v5, t_v5_receive_maximim_in_connack]}, {others, [], [ @@ -54,12 +54,25 @@ groups() -> t_clientid_override, t_clientid_override_fail_with_empty_render_result, t_clientid_override_fail_with_expression_exception + ]}, + {misbehaving, [], [ + t_sub_non_utf8_topic, + t_congestion_send_timeout ]} ]. init_per_suite(Config) -> Apps = emqx_cth_suite:start( - [{emqx, "listeners.ssl.default.ssl_options.verify = verify_peer"}], + [ + {emqx, + %% t_congestion_send_timeout + "listeners.tcp.default.tcp_options.send_timeout = 2500\n" + "listeners.tcp.default.tcp_options.sndbuf = 4KB\n" + "listeners.tcp.default.tcp_options.recbuf = 4KB\n" + "listeners.tcp.default.tcp_options.high_watermark = 160KB\n" + %% others + "listeners.ssl.default.ssl_options.verify = verify_peer\n"} + ], #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{apps, Apps} | Config]. @@ -254,36 +267,6 @@ t_dollar_topics(_) -> ?assertEqual(0, length(recv_msgs(1))), ok = emqtt:disconnect(C). -t_sub_non_utf8_topic(_) -> - {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), - ConnPacket = emqx_frame:serialize(#mqtt_packet{ - header = #mqtt_packet_header{type = 1}, - variable = #mqtt_packet_connect{ - clientid = <<"abcdefg">> - } - }), - ok = gen_tcp:send(Socket, ConnPacket), - receive - {tcp, _, _ConnAck = <<32, 2, 0, 0>>} -> ok - after 3000 -> ct:fail({connect_ack_not_recv, process_info(self(), messages)}) - end, - SubHeader = <<130, 18, 25, 178>>, - SubTopicLen = <<0, 13>>, - %% this is not a valid utf8 topic - SubTopic = <<128, 10, 10, 12, 178, 159, 162, 47, 115, 1, 1, 1, 1>>, - SubQoS = <<1>>, - SubPacket = <>, - ok = gen_tcp:send(Socket, SubPacket), - receive - {tcp_closed, _} -> ok - after 3000 -> ct:fail({should_get_disconnected, process_info(self(), messages)}) - end, - timer:sleep(1000), - ListenerCounts = emqx_listeners:shutdown_count('tcp:default', {{0, 0, 0, 0}, 1883}), - TopicInvalidCount = proplists:get_value(topic_filter_invalid, ListenerCounts), - ?assert(is_integer(TopicInvalidCount) andalso TopicInvalidCount > 0), - ok. - %%-------------------------------------------------------------------- %% Test cases for MQTT v5 %%-------------------------------------------------------------------- @@ -516,6 +499,94 @@ on_hook(_ClientInfo, ConnInfo, 'client.connected' = HP, Pid) -> _ = Pid ! {HP, ConnInfo}, ok. +%%-------------------------------------------------------------------- +%% Misbehaving clients +%%-------------------------------------------------------------------- + +t_sub_non_utf8_topic(_) -> + {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), + ConnPacket = ?CONNECT_PACKET(#mqtt_packet_connect{clientid = <<"abcdefg">>}), + ok = gen_tcp:send(Socket, emqx_frame:serialize(ConnPacket)), + receive + {tcp, _, _ConnAck = <<32, 2, 0, 0>>} -> ok + after 3000 -> ct:fail({connect_ack_not_recv, process_info(self(), messages)}) + end, + SubHeader = <<130, 18, 25, 178>>, + SubTopicLen = <<0, 13>>, + %% this is not a valid utf8 topic + SubTopic = <<128, 10, 10, 12, 178, 159, 162, 47, 115, 1, 1, 1, 1>>, + SubQoS = <<1>>, + SubPacket = <>, + ok = gen_tcp:send(Socket, SubPacket), + receive + {tcp_closed, _} -> ok + after 3000 -> ct:fail({should_get_disconnected, process_info(self(), messages)}) + end, + timer:sleep(1000), + ListenerCounts = emqx_listeners:shutdown_count('tcp:default', 1883), + TopicInvalidCount = proplists:get_value(topic_filter_invalid, ListenerCounts), + ?assert(is_integer(TopicInvalidCount) andalso TopicInvalidCount > 0), + ok. + +t_congestion_send_timeout(_) -> + ok = emqx_config:put_zone_conf(default, [mqtt, idle_timeout], 1000), + {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, false}, binary]), + %% Send manually constructed CONNECT: + ok = gen_tcp:send( + Socket, + emqx_frame:serialize( + ?CONNECT_PACKET(#mqtt_packet_connect{clientid = <<"t_congestion_send_timeout">>}) + ) + ), + {ok, Frames1} = gen_tcp:recv(Socket, 0, 1000), + {Pkt1, <<>>, Parser1} = emqx_frame:parse(Frames1, emqx_frame:initial_parse_state()), + ?assertMatch(?CONNACK_PACKET(0), Pkt1), + %% Send manually constructed SUBSCRIBE to subscribe to "t": + Topic = <<"t">>, + ok = gen_tcp:send( + Socket, + emqx_frame:serialize( + ?SUBSCRIBE_PACKET(1, [{Topic, #{rh => 0, rap => 0, nl => 0, qos => 0}}]) + ) + ), + {ok, Frames2} = gen_tcp:recv(Socket, 0, 1000), + {Pkt2, <<>>, _Parser2} = emqx_frame:parse(Frames2, Parser1), + ?assertMatch(?SUBACK_PACKET(1, [0]), Pkt2), + %% Subscribe to alarms: + AlarmTopic = <<"$SYS/brokers/+/alarms/activate">>, + ok = emqx_broker:subscribe(AlarmTopic), + %% Start filling up send buffers: + Publisher = fun Publisher(N) -> + %% Each message has 8000 bytes payload: + Payload = binary:copy(<>, 1000), + _ = emqx:publish(emqx_message:make(<<"publisher">>, Topic, Payload)), + ok = timer:sleep(50), + Publisher(N + 1) + end, + _PublisherPid = spawn_link(fun() -> Publisher(1) end), + %% Start lagging consumer: + Consumer = fun Consumer() -> + case gen_tcp:recv(Socket, 1000, 1000) of + {ok, _Bytes} -> + ok = timer:sleep(50), + Consumer(); + {error, closed} -> + closed + end + end, + _ConsumerPid = spawn_link(fun() -> Consumer() end), + %% Congestion alarm should be raised soon: + {deliver, _, AlarmMsg} = ?assertReceive({deliver, AlarmTopic, _AlarmMsg}, 5_000), + #{ + <<"name">> := <<"conn_congestion/t_congestion_send_timeout/undefined">>, + <<"details">> := AlarmDetails + } = emqx_utils_json:decode(emqx_message:payload(AlarmMsg)), + %% Connection should be closed once send timeout passes. + ConnPid = list_to_pid(binary_to_list(maps:get(<<"pid">>, AlarmDetails))), + MRef = erlang:monitor(process, ConnPid), + ?assertReceive({'DOWN', MRef, process, ConnPid, {shutdown, send_timeout}}, 5_000), + ok = gen_tcp:close(Socket). + %%-------------------------------------------------------------------- %% Helper functions %%-------------------------------------------------------------------- From e1312b6d324a58eab6de370b6ece1c43f46c2ffb Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Mon, 30 Jun 2025 18:13:50 +0200 Subject: [PATCH 13/31] feat(emqx): support `esockd_socket` backend through listener config --- apps/emqx/src/emqx_listeners.erl | 44 +++- apps/emqx/src/emqx_schema.erl | 8 + apps/emqx/test/emqx_client_SUITE.erl | 190 ++++++++++-------- apps/emqx/test/emqx_listeners_SUITE.erl | 16 ++ .../emqx/test/emqx_mqtt_protocol_v5_SUITE.erl | 19 +- rel/i18n/emqx_schema.hocon | 11 + 6 files changed, 196 insertions(+), 92 deletions(-) diff --git a/apps/emqx/src/emqx_listeners.erl b/apps/emqx/src/emqx_listeners.erl index c315e48aa27..987a363f141 100644 --- a/apps/emqx/src/emqx_listeners.erl +++ b/apps/emqx/src/emqx_listeners.erl @@ -426,11 +426,17 @@ console_print(_Fmt, _Args) -> ok. -spec do_start_listener(listener_type(), atom(), listener_id(), map()) -> {ok, pid() | {skipped, atom()}} | {error, term()}. %% Start MQTT/TCP listener +do_start_listener(Type = tcp, Name, Id, #{bind := ListenOn, tcp_backend := socket} = Conf) -> + esockd:open_tcpsocket( + Id, + ListenOn, + esockd_opts(Id, Type, Name, emqx_socket_connection, Conf) + ); do_start_listener(Type, Name, Id, #{bind := ListenOn} = Opts) when ?ESOCKD_LISTENER(Type) -> esockd:open( Id, ListenOn, - esockd_opts(Id, Type, Name, Opts) + esockd_opts(Id, Type, Name, emqx_connection, Opts) ); %% Start MQTT/WS listener do_start_listener(Type, Name, Id, Opts) when ?COWBOY_LISTENER(Type) -> @@ -454,13 +460,35 @@ do_start_listener(quic, Name, Id, #{bind := Bind} = Opts) -> {ok, {skipped, quic_app_missing}} end. +do_update_listener( + Type = tcp, Name, OldConf, NewConf = #{bind := ListenOn, tcp_backend := Backend} +) -> + Id = listener_id(tcp, Name), + case OldConf of + #{bind := ListenOn, tcp_backend := Backend} -> + case Backend of + gen_tcp -> ConnMod = emqx_connection; + socket -> ConnMod = emqx_socket_connection + end, + esockd:reset_options( + {Id, ListenOn}, + esockd_opts(Id, Type, Name, ConnMod, NewConf) + ); + _Different -> + %% TODO + %% Again, we're not strictly required to drop live connections in this case. + {error, not_supported} + end; do_update_listener(Type, Name, OldConf, NewConf = #{bind := ListenOn}) when ?ESOCKD_LISTENER(Type) -> Id = listener_id(Type, Name), - case maps:get(bind, OldConf) of - ListenOn -> - esockd:reset_options({Id, ListenOn}, esockd_opts(Id, Type, Name, NewConf)); + case OldConf of + #{bind := ListenOn} -> + esockd:reset_options( + {Id, ListenOn}, + esockd_opts(Id, Type, Name, emqx_connection, NewConf) + ); _Different -> %% TODO %% Again, we're not strictly required to drop live connections in this case. @@ -578,17 +606,17 @@ perform_listener_change(update, {{Type, Name, ConfOld}, {_, _, ConfNew}}) -> perform_listener_change(stop, {Type, Name, Conf}) -> stop_listener(Type, Name, Conf). -esockd_opts(ListenerId, Type, Name, Opts0) -> +esockd_opts(ListenerId, Type, Name, ConnMod, Opts0) -> Zone = zone(Opts0), PacketTcpOpts = choose_packet_opts(Opts0), + Limiter = emqx_limiter:create_esockd_limiter_client(Zone, ListenerId), Opts1 = maps:with([acceptors, max_connections, proxy_protocol, proxy_protocol_timeout], Opts0), - ESockdLimiter = emqx_limiter:create_esockd_limiter_client(Zone, ListenerId), Opts2 = Opts1#{ - limiter => ESockdLimiter, + limiter => Limiter, access_rules => esockd_access_rules(maps:get(access_rules, Opts0, [])), tune_fun => {emqx_olp, backoff_new_conn, [Zone]}, connection_mfargs => - {emqx_connection, start_link, [ + {ConnMod, start_link, [ #{ listener => {Type, Name}, zone => Zone, diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 17faaf64413..4c5acc0f251 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -627,6 +627,14 @@ fields("crl_cache") -> ]; fields("mqtt_tcp_listener") -> mqtt_listener(1883) ++ + [ + {"tcp_backend", + sc(hoconsc:enum([gen_tcp, socket]), #{ + default => <<"gen_tcp">>, + desc => ?DESC(fields_mqtt_opts_tcp_backend), + importance => ?IMPORTANCE_LOW + })} + ] ++ mqtt_parse_options() ++ [ {"tcp_options", diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index ef5cb289cd2..11f50c5d77a 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -18,18 +18,30 @@ all() -> [ - {group, mqttv3}, - {group, mqttv4}, - {group, mqttv5}, - {group, others}, - {group, misbehaving} + {group, gen_tcp_listener}, + {group, socket_listener} ]. groups() -> [ - {mqttv3, [], [t_basic_v3]}, + {gen_tcp_listener, [], [ + {group, mqttv3}, + {group, mqttv4}, + {group, mqttv5}, + {group, others}, + {group, misbehaving} + ]}, + {socket_listener, [], [ + {group, sock_closed}, + {group, misbehaving} + ]}, + {mqttv3, [], [ + t_basic, + t_sock_closed_reason_normal, + t_sock_closed_force_closed_by_client + ]}, {mqttv4, [], [ - t_basic_v4, + t_basic, t_cm, t_cm_registry, %% t_will_message, @@ -37,16 +49,21 @@ groups() -> t_overlapping_subscriptions, %% t_keepalive, t_redelivery_on_reconnect, - t_dollar_topics + t_dollar_topics, + t_sock_closed_reason_normal, + t_sock_closed_force_closed_by_client + ]}, + {mqttv5, [], [ + t_basic_with_props_v5, + t_v5_receive_maximim_in_connack, + t_sock_closed_reason_normal, + t_sock_closed_force_closed_by_client ]}, - {mqttv5, [], [t_basic_with_props_v5, t_v5_receive_maximim_in_connack]}, {others, [], [ t_username_as_clientid, t_certcn_as_alias, t_certdn_as_alias, t_client_attr_from_user_property, - t_sock_closed_reason_normal, - t_sock_closed_force_closed_by_client, t_certcn_as_clientid_default_config_tls, t_certcn_as_clientid_tlsv1_3, t_certcn_as_clientid_tlsv1_2, @@ -58,14 +75,25 @@ groups() -> {misbehaving, [], [ t_sub_non_utf8_topic, t_congestion_send_timeout + ]}, + {sock_closed, [], [ + t_sock_closed_reason_normal, + t_sock_closed_force_closed_by_client ]} ]. init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(gen_tcp_listener, Config) -> Apps = emqx_cth_suite:start( [ {emqx, %% t_congestion_send_timeout + "listeners.tcp.default.tcp_backend = gen_tcp\n" "listeners.tcp.default.tcp_options.send_timeout = 2500\n" "listeners.tcp.default.tcp_options.sndbuf = 4KB\n" "listeners.tcp.default.tcp_options.recbuf = 4KB\n" @@ -75,10 +103,37 @@ init_per_suite(Config) -> ], #{work_dir => emqx_cth_suite:work_dir(Config)} ), - [{apps, Apps} | Config]. + [{group_apps, Apps} | Config]; +init_per_group(socket_listener, Config) -> + Apps = emqx_cth_suite:start( + [ + {emqx, + %% t_congestion_send_timeout + "listeners.tcp.default.tcp_backend = socket\n" + "listeners.tcp.default.tcp_options.send_timeout = 2500\n" + "listeners.tcp.default.tcp_options.sndbuf = 4KB\n" + "listeners.tcp.default.tcp_options.recbuf = 4KB\n" + %% others + "listeners.ssl.default.ssl_options.verify = verify_peer\n"} + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{group_apps, Apps} | Config]; +init_per_group(mqttv3, Config) -> + [{proto_ver, v3} | Config]; +init_per_group(mqttv4, Config) -> + [{proto_ver, v4} | Config]; +init_per_group(mqttv5, Config) -> + [{proto_ver, v5} | Config]; +init_per_group(_GroupName, Config) -> + Config. -end_per_suite(Config) -> - emqx_cth_suite:stop(?config(apps, Config)). +end_per_group(gen_tcp_listener, Config) -> + emqx_cth_suite:stop(?config(group_apps, Config)); +end_per_group(socket_listener, Config) -> + emqx_cth_suite:stop(?config(group_apps, Config)); +end_per_group(_GroupName, _Config) -> + ok. init_per_testcase(_Case, Config) -> Config. @@ -92,20 +147,10 @@ end_per_testcase(_Case, _Config) -> emqx_config:put_zone_conf(default, [mqtt, clientid_override], disabled), ok. -%%-------------------------------------------------------------------- -%% Test cases for MQTT v3 -%%-------------------------------------------------------------------- - -t_basic_v3(_) -> - run_basic([{proto_ver, v3}]). - %%-------------------------------------------------------------------- %% Test cases for MQTT v4 %%-------------------------------------------------------------------- -t_basic_v4(_Config) -> - run_basic([{proto_ver, v4}]). - t_cm(_) -> emqx_config:put_zone_conf(default, [mqtt, idle_timeout], 1000), ClientId = atom_to_binary(?FUNCTION_NAME), @@ -271,18 +316,15 @@ t_dollar_topics(_) -> %% Test cases for MQTT v5 %%-------------------------------------------------------------------- -v5_conn_props(ReceiveMaximum) -> - [ - {proto_ver, v5}, - {properties, #{'Receive-Maximum' => ReceiveMaximum}} - ]. +v5_conn_props(ReceiveMaximum, Config) -> + [{properties, #{'Receive-Maximum' => ReceiveMaximum}} | Config]. -t_basic_with_props_v5(_) -> - run_basic(v5_conn_props(4)). +t_basic_with_props_v5(Config) -> + t_basic(v5_conn_props(4, Config)). -t_v5_receive_maximim_in_connack(_) -> +t_v5_receive_maximim_in_connack(Config) -> ReceiveMaximum = 7, - {ok, C} = emqtt:start_link(v5_conn_props(ReceiveMaximum)), + {ok, C} = emqtt:start_link(v5_conn_props(ReceiveMaximum, Config)), {ok, Props} = emqtt:connect(C), ?assertMatch(#{'Receive-Maximum' := ReceiveMaximum}, Props), ok = emqtt:disconnect(C), @@ -292,7 +334,7 @@ t_v5_receive_maximim_in_connack(_) -> %% General test cases. %%-------------------------------------------------------------------- -run_basic(Opts) -> +t_basic(Opts) -> Topic = <<"TopicA">>, {ok, C} = emqtt:start_link(Opts), {ok, _} = emqtt:connect(C), @@ -378,56 +420,46 @@ t_client_attr_from_user_property(_Config) -> ), emqtt:disconnect(Client). -t_sock_closed_reason_normal(_) -> - ProtoVers = [v3, v4, v5], +t_sock_closed_reason_normal(Config) -> ClientId = atom_to_binary(?FUNCTION_NAME), - [ - ?check_trace( - begin - {ok, C} = emqtt:start_link([{proto_ver, Ver}, {clientid, ClientId}]), - {ok, _} = emqtt:connect(C), - ?wait_async_action( - emqtt:disconnect(C), - #{?snk_kind := sock_closed_normal}, - 5_000 - ) - end, - fun(Trace0) -> - ?assertMatch([#{clientid := ClientId}], ?of_kind(sock_closed_normal, Trace0)), - ok - end - ) - || Ver <- ProtoVers - ]. + ?check_trace( + begin + {ok, C} = emqtt:start_link([{clientid, ClientId} | Config]), + {ok, _} = emqtt:connect(C), + ?wait_async_action( + emqtt:disconnect(C), + #{?snk_kind := sock_closed_normal}, + 5_000 + ) + end, + fun(Trace0) -> + ?assertMatch([#{clientid := ClientId}], ?of_kind(sock_closed_normal, Trace0)), + ok + end + ). -t_sock_closed_force_closed_by_client(_) -> - ProtoVers = [v3, v4, v5], +t_sock_closed_force_closed_by_client(Config) -> ClientId = atom_to_binary(?FUNCTION_NAME), - process_flag(trap_exit, true), - [ - ?check_trace( - begin - {ok, C} = emqtt:start_link([{proto_ver, Ver}, {clientid, ClientId}]), - {ok, _} = emqtt:connect(C), - ?wait_async_action( - exit(C, kill), - #{?snk_kind := sock_closed_with_other_reason}, - 5_000 - ) - end, - fun(Trace0) -> - ?assertMatch( - [#{clientid := ClientId}], ?of_kind(sock_closed_with_other_reason, Trace0) - ), - ok - end - ) - || Ver <- ProtoVers - ], - process_flag(trap_exit, false). + ?check_trace( + begin + {ok, C} = emqtt:start_link([{clientid, ClientId} | Config]), + {ok, _} = emqtt:connect(C), + true = erlang:unlink(C), + ?wait_async_action( + exit(C, kill), + #{?snk_kind := sock_closed_with_other_reason}, + 5_000 + ) + end, + fun(Trace0) -> + ?assertMatch( + [#{clientid := ClientId}], ?of_kind(sock_closed_with_other_reason, Trace0) + ), + ok + end + ). t_clientid_override(_) -> - emqx_logger:set_log_level(debug), ClientId = <<"original-clientid-0">>, Username = <<"username1">>, Override = <<"username">>, diff --git a/apps/emqx/test/emqx_listeners_SUITE.erl b/apps/emqx/test/emqx_listeners_SUITE.erl index ec447696954..96a805b8898 100644 --- a/apps/emqx/test/emqx_listeners_SUITE.erl +++ b/apps/emqx/test/emqx_listeners_SUITE.erl @@ -176,6 +176,7 @@ t_tcp_frame_parsing_conn(_Config) -> Port = emqx_common_test_helpers:select_free_port(tcp), Conf = #{ <<"bind">> => format_bind({"127.0.0.1", Port}), + <<"tcp_backend">> => <<"gen_tcp">>, <<"parse_unit">> => <<"frame">> }, with_listener(tcp, ?FUNCTION_NAME, Conf, fun() -> @@ -187,6 +188,21 @@ t_tcp_frame_parsing_conn(_Config) -> ?assertMatch(#{parser := {frame, _Options}}, CState) end). +t_tcp_socket_conn(_Config) -> + Port = emqx_common_test_helpers:select_free_port(tcp), + Conf = #{ + <<"bind">> => format_bind({"127.0.0.1", Port}), + <<"tcp_backend">> => <<"socket">> + }, + with_listener(tcp, ?FUNCTION_NAME, Conf, fun() -> + Client = emqtt_connect_tcp({127, 0, 0, 1}, Port), + pong = emqtt:ping(Client), + ?assertEqual( + emqx_socket_connection, + emqx_cth_broker:connection_info(connmod, Client) + ) + end). + t_ssl_frame_parsing_conn(Config) -> PrivDir = ?config(priv_dir, Config), Port = emqx_common_test_helpers:select_free_port(ssl), diff --git a/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl b/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl index 0a24280457e..ef870a6facf 100644 --- a/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl +++ b/apps/emqx/test/emqx_mqtt_protocol_v5_SUITE.erl @@ -42,17 +42,20 @@ all() -> [ {group, tcp}, {group, tcp_beam_framing}, + {group, tcp_socket}, {group, ws}, {group, quic} ]. groups() -> - TCs = emqx_common_test_helpers:all(?MODULE), + TCsQuic = [t_connect_clean_start_unresp_old_client], + TCs = emqx_common_test_helpers:all(?MODULE) -- TCsQuic, [ - {tcp, [], TCs -- [t_connect_clean_start_unresp_old_client]}, - {tcp_beam_framing, [], TCs -- [t_connect_clean_start_unresp_old_client]}, - {ws, [], TCs -- [t_connect_clean_start_unresp_old_client]}, - {quic, [], TCs} + {tcp, [], TCs}, + {tcp_beam_framing, [], TCs}, + {tcp_socket, [], TCs}, + {ws, [], TCs}, + {quic, [], TCs ++ TCsQuic} ]. init_per_group(tcp, Config) -> @@ -64,6 +67,12 @@ init_per_group(tcp_beam_framing, Config) -> #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{conn_type, tcp}, {port, 2883}, {conn_fun, connect}, {group_apps, Apps} | Config]; +init_per_group(tcp_socket, Config) -> + Apps = emqx_cth_suite:start( + [{emqx, "listeners.tcp.test { enable = true, bind = 2883, tcp_backend = socket }"}], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{conn_type, tcp}, {port, 2883}, {conn_fun, connect}, {group_apps, Apps} | Config]; init_per_group(quic, Config) -> Apps = emqx_cth_suite:start( [{emqx, "listeners.quic.test { enable = true, bind = 1884 }"}], diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index 9a12ca3eb7e..f6d3e44b1be 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -613,6 +613,17 @@ Note, the choice of `parse_unit` affects the interpretation of the `active_n` se fields_mqtt_opts_parse_unit.label: """MQTT Message Parse Unit""" +fields_mqtt_opts_tcp_backend.desc: +"""Indicates which TCP backend should be used by the listener. + +- `gen_tcp`: Standard backend, in use since EMQX 5.0 release. + +- `socket`: Experimental backend, looking to improve message latency and compute resource usage. + Note that some `tcp_options` settings will have no effect when using this backend, e.g.: `high_watermark` and `send_timeout_close`.""" + +fields_mqtt_opts_tcp_backend.label: +"""TCP Backend""" + server_ssl_opts_schema_honor_cipher_order.desc: """An important security setting. If this setting is enabled, the server will prioritize the cipher suites it prefers most from the list of cipher suites supported by the client, thus ignoring the client's preferences. From 960f9fb614d59f4455e2950f50047500c3a3bf8a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 10 Jul 2025 19:21:29 +0200 Subject: [PATCH 14/31] test(conn): verify connmods work well with socket close and keepalive --- apps/emqx/src/emqx_connection.erl | 5 ++ apps/emqx/src/emqx_socket_connection.erl | 8 ++- apps/emqx/test/emqx_client_SUITE.erl | 84 +++++++++++++++++++++++- 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index b888648df32..9e39b8b4988 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -288,6 +288,11 @@ stop(Pid) -> init(Parent, Transport, RawSocket, Options) -> case Transport:wait(RawSocket) of {ok, Socket} -> + ?tp(connection_started, #{ + socket => Socket, + listener => maps:get(listener, Options), + connmod => ?MODULE + }), run_loop(Parent, init_state(Transport, Socket, Options)); {error, Reason} -> ok = Transport:fast_close(RawSocket), diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 35d96cf2c11..58e1e86283c 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -264,6 +264,11 @@ stop(Pid) -> init(Parent, esockd_socket, RawSocket, Options) -> case esockd_socket:wait(RawSocket) of {ok, Socket} -> + ?tp(connection_started, #{ + socket => Socket, + listener => maps:get(listener, Options), + connmod => ?MODULE + }), run_loop(Parent, init_state(Socket, Options)); {error, Reason} -> ok = esockd_socket:fast_close(RawSocket), @@ -321,7 +326,6 @@ init_state( ensure_ok_or_exit(Result, Sock) -> case Result of {error, Reason} when Reason =:= enotconn; Reason =:= closed -> - esockd_socket:fast_close(Sock), exit(normal); {error, Reason} -> esockd_socket:fast_close(Sock), @@ -346,7 +350,7 @@ run_loop( emqx_logger:set_metadata_peername(esockd:format(Peername)), ShutdownPolicy = emqx_config:get_zone_conf(Zone, [force_shutdown]), _ = emqx_utils:tune_heap_size(ShutdownPolicy), - ok = set_tcp_keepalive(Listener), + _ = set_tcp_keepalive(Listener), case sock_async_recv(Socket, 0) of {ok, Data} -> NState = start_idle_timer(State), diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index 11f50c5d77a..014e1c10712 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -29,10 +29,11 @@ groups() -> {group, mqttv4}, {group, mqttv5}, {group, others}, + {group, socket}, {group, misbehaving} ]}, {socket_listener, [], [ - {group, sock_closed}, + {group, socket}, {group, misbehaving} ]}, {mqttv3, [], [ @@ -73,10 +74,13 @@ groups() -> t_clientid_override_fail_with_expression_exception ]}, {misbehaving, [], [ + t_sock_closed_instantly, + t_sock_closed_quickly, t_sub_non_utf8_topic, t_congestion_send_timeout ]}, - {sock_closed, [], [ + {socket, [], [ + t_sock_keepalive, t_sock_closed_reason_normal, t_sock_closed_force_closed_by_client ]} @@ -136,15 +140,18 @@ end_per_group(_GroupName, _Config) -> ok. init_per_testcase(_Case, Config) -> + ok = snabbkaffe:start_trace(), Config. end_per_testcase(_Case, _Config) -> + ok = snabbkaffe:stop(), %% restore default values emqx_config:put_zone_conf(default, [mqtt, idle_timeout], 15000), emqx_config:put_zone_conf(default, [mqtt, use_username_as_clientid], false), emqx_config:put_zone_conf(default, [mqtt, peer_cert_as_clientid], disabled), emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], []), emqx_config:put_zone_conf(default, [mqtt, clientid_override], disabled), + emqx_config:put_listener_conf(tcp, default, [tcp_options, keepalive], "none"), ok. %%-------------------------------------------------------------------- @@ -420,6 +427,23 @@ t_client_attr_from_user_property(_Config) -> ), emqtt:disconnect(Client). +t_sock_keepalive(Config) -> + %% Configure TCP Keepalive: + ok = emqx_config:put_listener_conf(tcp, default, [tcp_options, keepalive], "1,1,5"), + %% Connect MQTT client: + ClientId = atom_to_binary(?FUNCTION_NAME), + {ok, C} = emqtt:start_link([{clientid, ClientId} | Config]), + { + {ok, _}, + {ok, #{?snk_meta := #{pid := CPid}}} + } = ?wait_async_action(emqtt:connect(C), #{?snk_kind := connection_started}), + %% Verify TCP settings handled smoothly: + %% If actual keepalive probes are going around is notoriously difficult to verify. + MRef = erlang:monitor(process, CPid), + ok = timer:sleep(1_000), + ok = emqtt:disconnect(C), + ?assertReceive({'DOWN', MRef, process, CPid, normal}). + t_sock_closed_reason_normal(Config) -> ClientId = atom_to_binary(?FUNCTION_NAME), ?check_trace( @@ -535,6 +559,58 @@ on_hook(_ClientInfo, ConnInfo, 'client.connected' = HP, Pid) -> %% Misbehaving clients %%-------------------------------------------------------------------- +t_sock_closed_instantly(_) -> + %% Introduce scheduling delays: + meck:new(esockd_transport, [no_history, passthrough]), + meck:new(esockd_socket, [no_history, passthrough]), + meck:expect(esockd_transport, type, fun meck_sched_delay/1), + meck:expect(esockd_socket, type, fun meck_sched_delay/1), + %% Start a tracing session, to catch exit reasons consistently: + TS = trace:session_create(?MODULE, self(), []), + %% Estabilish a connection: + { + {ok, Socket}, + {ok, #{?snk_meta := #{pid := CPid}}} + } = ?wait_async_action( + gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), + #{?snk_kind := connection_started} + ), + %% Verify it handles instant socket close smoothly: + trace:process(TS, CPid, true, [procs]), + try + ok = gen_tcp:close(Socket), + ?assertReceive( + {trace, CPid, exit, Reason} when + Reason == {shutdown, tcp_closed} orelse Reason == normal + ) + after + trace:session_destroy(TS), + meck:unload() + end. + +t_sock_closed_quickly(_) -> + %% Start a tracing session: + TS = trace:session_create(?MODULE, self(), []), + %% Estabilish a connection: + { + {ok, Socket}, + {ok, #{?snk_meta := #{pid := CPid}}} + } = ?wait_async_action( + gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), + #{?snk_kind := connection_started} + ), + %% Verify it handles quick socket close smoothly: + trace:process(TS, CPid, true, [procs]), + try + ok = gen_tcp:close(Socket), + ?assertReceive( + {trace, CPid, exit, Reason} when + Reason == {shutdown, tcp_closed} orelse Reason == normal + ) + after + trace:session_destroy(TS) + end. + t_sub_non_utf8_topic(_) -> {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, true}, binary]), ConnPacket = ?CONNECT_PACKET(#mqtt_packet_connect{clientid = <<"abcdefg">>}), @@ -659,3 +735,7 @@ tls_certcn_as_clientid(TLSVsn, RequiredTLSVsn) -> #{clientinfo := #{clientid := CN}} = emqx_cm:get_chan_info(CN), confirm_tls_version(Client, RequiredTLSVsn), emqtt:disconnect(Client). + +meck_sched_delay(X) -> + erlang:yield(), + meck:passthrough([X]). From f69c8772b12d29d512f08ea383de3e11f65f5fbf Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 10 Jul 2025 19:44:09 +0200 Subject: [PATCH 15/31] perf(frame): optimize away and inline few serialization routines --- apps/emqx/src/emqx_frame.erl | 144 +++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 64 deletions(-) diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index 5694d51e70f..6e3a6dc4289 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -74,8 +74,6 @@ -define(MULTIPLIER_MAX, 16#200000). --dialyzer({no_match, [serialize_utf8_string/3]}). - %% @doc Describe state for logging. describe_state(Options = #options{}) -> #{ @@ -777,9 +775,11 @@ serialize_pkt(Packet, #{version := Ver, max_size := MaxSize, strict_mode := Stri end. -spec serialize(emqx_types:packet()) -> iodata(). -serialize(Packet) -> serialize(Packet, ?MQTT_PROTO_V4, false). +serialize(Packet) -> + serialize(Packet, ?MQTT_PROTO_V4, false). -serialize(Packet, Ver) -> serialize(Packet, Ver, false). +serialize(Packet, Ver) -> + serialize(Packet, Ver, false). -spec serialize(emqx_types:packet(), emqx_types:proto_ver(), boolean()) -> iodata(). serialize( @@ -791,34 +791,44 @@ serialize( Ver, StrictMode ) -> - serialize( - Header, - serialize_variable(Variable, Ver, StrictMode), - serialize_payload(Payload), - StrictMode - ). - -serialize( - #mqtt_packet_header{ - type = Type, - dup = Dup, - qos = QoS, - retain = Retain - }, - VariableBin, - PayloadBin, - _StrictMode -) when - ?CONNECT =< Type andalso Type =< ?AUTH --> - Len = iolist_size(VariableBin) + iolist_size(PayloadBin), + VariableBin = serialize_variable(Variable, Ver, StrictMode), + PayloadBin = serialize_payload(Payload), + RemLen = iolist_size(VariableBin) + iolist_size(PayloadBin), [ - <>, - serialize_remaining_len(Len), + serialize_header(Header), + serialize_remaining_len(RemLen), VariableBin, PayloadBin ]. +-compile( + {inline, [ + serialize_header/1, + serialize_payload/1, + serialize_remaining_len/1, + serialize_variable_byte_integer/1 + ]} +). + +%% erlfmt-ignore +-define(bool(B), (case B of true -> 1; false -> 0; undefined -> 0 end):1). + +%% erlfmt-ignore +-define(utf8string(X, STRICT), + (begin + true = (case STRICT of + true -> byte_size(unicode:characters_to_binary(X)); + false -> byte_size(X) + end =< 16#ffff), + byte_size(X) + end):16/big-unsigned-integer, X/bytes +). + +serialize_header( + #mqtt_packet_header{type = Type, dup = Dup, qos = QoS, retain = Retain} +) when ?CONNECT =< Type andalso Type =< ?AUTH -> + <>. + serialize_variable( #mqtt_packet_connect{ proto_name = ProtoName, @@ -859,19 +869,25 @@ serialize_variable( KeepAlive:16/big-unsigned-integer >>, serialize_properties(Properties, ProtoVer, StrictMode), - serialize_utf8_string(ClientId, StrictMode), + <>, case WillFlag of true -> [ serialize_properties(WillProps, ProtoVer, StrictMode), - serialize_utf8_string(WillTopic, StrictMode), + <>, serialize_binary_data(WillPayload) ]; false -> <<>> end, - serialize_utf8_string(Username, true, StrictMode), - serialize_utf8_string(Password, true, StrictMode) + case Username of + undefined -> <<>>; + _ -> <> + end, + case Password of + undefined -> <<>>; + _ -> <> + end ]; serialize_variable( #mqtt_packet_connack{ @@ -893,7 +909,7 @@ serialize_variable( StrictMode ) -> [ - serialize_utf8_string(TopicName, StrictMode), + <>, case PacketId of undefined -> <<>>; _ -> <> @@ -1036,9 +1052,9 @@ serialize_property('Payload-Format-Indicator', Val, _StrictMode) -> serialize_property('Message-Expiry-Interval', Val, _StrictMode) -> <<16#02, Val:32/big>>; serialize_property('Content-Type', Val, StrictMode) -> - <<16#03, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#03, ?utf8string(Val, StrictMode)>>; serialize_property('Response-Topic', Val, StrictMode) -> - <<16#08, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#08, ?utf8string(Val, StrictMode)>>; serialize_property('Correlation-Data', Val, _StrictMode) -> <<16#09, (byte_size(Val)):16, Val/binary>>; serialize_property('Subscription-Identifier', Val, _StrictMode) -> @@ -1046,11 +1062,11 @@ serialize_property('Subscription-Identifier', Val, _StrictMode) -> serialize_property('Session-Expiry-Interval', Val, _StrictMode) -> <<16#11, Val:32/big>>; serialize_property('Assigned-Client-Identifier', Val, StrictMode) -> - <<16#12, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#12, ?utf8string(Val, StrictMode)>>; serialize_property('Server-Keep-Alive', Val, _StrictMode) -> <<16#13, Val:16/big>>; serialize_property('Authentication-Method', Val, StrictMode) -> - <<16#15, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#15, ?utf8string(Val, StrictMode)>>; serialize_property('Authentication-Data', Val, _StrictMode) -> <<16#16, (iolist_size(Val)):16, Val/binary>>; serialize_property('Request-Problem-Information', Val, _StrictMode) -> @@ -1060,11 +1076,11 @@ serialize_property('Will-Delay-Interval', Val, _StrictMode) -> serialize_property('Request-Response-Information', Val, _StrictMode) -> <<16#19, Val>>; serialize_property('Response-Information', Val, StrictMode) -> - <<16#1A, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#1A, ?utf8string(Val, StrictMode)>>; serialize_property('Server-Reference', Val, StrictMode) -> - <<16#1C, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#1C, ?utf8string(Val, StrictMode)>>; serialize_property('Reason-String', Val, StrictMode) -> - <<16#1F, (serialize_utf8_string(Val, StrictMode))/binary>>; + <<16#1F, ?utf8string(Val, StrictMode)>>; serialize_property('Receive-Maximum', Val, _StrictMode) -> <<16#21, Val:16/big>>; serialize_property('Topic-Alias-Maximum', Val, _StrictMode) -> @@ -1094,7 +1110,7 @@ serialize_property('Shared-Subscription-Available', Val, _StrictMode) -> serialize_topic_filters(subscribe, TopicFilters, ?MQTT_PROTO_V5, StrictMode) -> << << - (serialize_utf8_string(Topic, StrictMode))/binary, + ?utf8string(Topic, StrictMode), ?RESERVED:2, Rh:2, (flag(Rap)):1, @@ -1105,11 +1121,11 @@ serialize_topic_filters(subscribe, TopicFilters, ?MQTT_PROTO_V5, StrictMode) -> >>; serialize_topic_filters(subscribe, TopicFilters, _Ver, StrictMode) -> << - <<(serialize_utf8_string(Topic, StrictMode))/binary, ?RESERVED:6, QoS:2>> + <> || {Topic, #{qos := QoS}} <- TopicFilters >>; serialize_topic_filters(unsubscribe, TopicFilters, _Ver, StrictMode) -> - <<<<(serialize_utf8_string(Topic, StrictMode))/binary>> || Topic <- TopicFilters>>. + <<<> || Topic <- TopicFilters>>. serialize_reason_codes(undefined) -> <<>>; @@ -1117,36 +1133,36 @@ serialize_reason_codes(ReasonCodes) when is_list(ReasonCodes) -> <<<> || Code <- ReasonCodes>>. serialize_utf8_pair(Name, Value, StrictMode) -> - << - (serialize_utf8_string(Name, StrictMode))/binary, - (serialize_utf8_string(Value, StrictMode))/binary - >>. + <>. serialize_binary_data(Bin) -> [<<(byte_size(Bin)):16/big-unsigned-integer>>, Bin]. -serialize_utf8_string(undefined, false, _StrictMode) -> - ?SERIALIZE_ERR(utf8_string_undefined); -serialize_utf8_string(undefined, true, _StrictMode) -> - <<>>; -serialize_utf8_string(String, _AllowNull, StrictMode) -> - serialize_utf8_string(String, StrictMode). - -serialize_utf8_string(String, true) -> - StringBin = unicode:characters_to_binary(String), - serialize_utf8_string(StringBin, false); -serialize_utf8_string(String, false) -> - Len = byte_size(String), - true = (Len =< 16#ffff), - <>. - serialize_remaining_len(I) -> serialize_variable_byte_integer(I). -serialize_variable_byte_integer(N) when N =< ?LOWBITS -> +serialize_variable_byte_integer(N) when N < (1 bsl 7) -> <<0:1, N:7>>; -serialize_variable_byte_integer(N) -> - <<1:1, (N rem ?HIGHBIT):7, (serialize_variable_byte_integer(N div ?HIGHBIT))/binary>>. +serialize_variable_byte_integer(N) when N < (1 bsl 14) -> + <<1:1, (N band 2#1111111):7, (N bsr 7):8>>; +serialize_variable_byte_integer(N) when N < (1 bsl 21) -> + << + 1:1, + (N band 2#1111111):7, + 1:1, + ((N bsr 7) band 2#1111111):7, + (N bsr 14):8 + >>; +serialize_variable_byte_integer(N) when N < (1 bsl 28) -> + << + 1:1, + (N band 2#1111111):7, + 1:1, + ((N bsr 7) band 2#1111111):7, + 1:1, + ((N bsr 14) band 2#1111111):7, + (N bsr 21):8 + >>. %% Is the frame too large? -spec is_too_large(iodata(), pos_integer()) -> boolean(). From 13a081b2e0293f6e2bf4dfdd24d83a20a4dd5168 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 10 Jul 2025 20:18:15 +0200 Subject: [PATCH 16/31] chore(socket): fix dialyzer complaints --- apps/emqx/src/emqx_socket_connection.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 58e1e86283c..f739607630d 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -106,7 +106,7 @@ -record(congested, { handle :: reference(), deadline :: _TimestampMs :: integer(), - sendq :: [erlang:iovec()] + sendq :: [erlang:iodata()] }). -type congested() :: #congested{}. @@ -941,7 +941,7 @@ serialize_and_inc_stats(#state{serialize = Serialize}, Packet) -> send(Num, IoData, #state{socket = Socket, sockstate = idle} = State) -> Oct = iolist_size(IoData), Handle = make_ref(), - case socket:send(Socket, IoData, Handle) of + case socket:send(Socket, IoData, [], Handle) of ok -> sent(Num, Oct, State); {select, {_Info, Rest}} -> @@ -969,7 +969,7 @@ send(_Num, _IoVec, #state{sockstate = closed} = State) -> handle_send_ready(Socket, SS = #congested{sendq = SQ}, State) -> IoData = sendq_to_iodata(SQ, []), Handle = make_ref(), - case socket:send(Socket, IoData, Handle) of + case socket:send(Socket, IoData, [], Handle) of ok -> {ok, State}; {select, {_Info, Rest}} -> From 3a3ce168720f9032e736b5d9342701fd7809155e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 10 Jul 2025 20:42:03 +0200 Subject: [PATCH 17/31] chore: add changelog entry --- changes/ee/perf-15451.en.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/ee/perf-15451.en.md diff --git a/changes/ee/perf-15451.en.md b/changes/ee/perf-15451.en.md new file mode 100644 index 00000000000..57d579050d3 --- /dev/null +++ b/changes/ee/perf-15451.en.md @@ -0,0 +1 @@ +Introduce experimental `socket` backend for TCP listeners, designed to improve message processing latency and reduce compute resource usage. This can be enabled via the new `tcp_backend` listener option. From 242148f0df152b08bd5ce704f6ccfe200084ea76 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 16 Jul 2025 10:19:12 +0200 Subject: [PATCH 18/31] chore(socket): drop unnecessary directive --- apps/emqx/src/emqx_socket_connection.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index f739607630d..0aa1b8e70f8 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -4,6 +4,10 @@ %% This module interacts with the transport layer of MQTT %% Transport: esockd_socket. +%% +%% NOTE +%% When changing this module, please make an effort to port changes to +%% `emqx_connection` module if they make sense there, and vice versa. -module(emqx_socket_connection). -include("emqx.hrl"). @@ -20,7 +24,6 @@ -endif. -elvis([{elvis_style, used_ignored_variable, disable}]). --elvis([{elvis_style, invalid_dynamic_call, #{ignore => [emqx_connection]}}]). %% API -export([ From 6dcae1cc5118a215a348b1c49ce2967dbe84ec2a Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Thu, 17 Jul 2025 16:45:47 +0200 Subject: [PATCH 19/31] fix(socket): avoid closing already closed socket --- apps/emqx/src/emqx_socket_connection.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 0aa1b8e70f8..2070f94aaaa 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -1055,7 +1055,7 @@ handle_info({sock_error, Reason}, State) -> true -> ?SLOG(warning, #{msg => "socket_error", reason => Reason}); false -> ok end, - handle_info({sock_closed, Reason}, close_socket(State)); + handle_info({sock_closed, Reason}, ensure_close_socket(Reason, State)); %% handle QUIC control stream events handle_info({quic, Event, Handle, Prop}, State) when is_atom(Event) -> case emqx_quic_stream:Event(Handle, Prop, State) of @@ -1130,6 +1130,11 @@ check_oom(Pubs, Bytes, State = #state{zone = Zone}) -> %%-------------------------------------------------------------------- %% Close Socket +ensure_close_socket(closed, State) -> + socket_closed(State); +ensure_close_socket(_Reason, State) -> + close_socket(State). + close_socket(State = #state{sockstate = closed}) -> State; close_socket(State = #state{socket = Socket}) -> From 3a813744e7abd3c6f076018d57d0fd104d7f0a0f Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 18 Jul 2025 12:41:38 +0200 Subject: [PATCH 20/31] fix(socket): anticipate > 1 socket `abort`s on socket close --- apps/emqx/src/emqx_socket_connection.erl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 2070f94aaaa..5bcf13cc7ca 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -550,8 +550,14 @@ handle_msg({'$socket', Socket, select, Handle}, State = #state{sockstate = SS}) _ -> handle_data_ready(Socket, State) end; -handle_msg({'$socket', _Socket, abort, {_Handle, Reason}}, State) -> - handle_info({sock_error, Reason}, State); +handle_msg({'$socket', _Socket, abort, {_Handle, Reason}}, State = #state{sockstate = SS}) -> + case SS =/= closed of + true -> + handle_info({sock_error, Reason}, State); + false -> + %% In case there were more than 1 outstanding select: + {ok, State} + end; handle_msg({recv, Data}, State) -> handle_data(Data, false, State); handle_msg({recv_more, Data}, State) -> From 34309434b5b12b527ba89cd19e8abb73109d3558 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 18 Jul 2025 12:43:19 +0200 Subject: [PATCH 21/31] chore(socket): correct typespec --- apps/emqx/src/emqx_socket_connection.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 5bcf13cc7ca..bdb2012ca3f 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -946,7 +946,8 @@ serialize_and_inc_stats(#state{serialize = Serialize}, Packet) -> %%-------------------------------------------------------------------- %% Send data --spec send(non_neg_integer(), iodata(), state()) -> {ok, state()}. +-spec send(non_neg_integer(), iodata(), state()) -> + {ok, state()} | {ok, {sock_error, _Reason}, state()}. send(Num, IoData, #state{socket = Socket, sockstate = idle} = State) -> Oct = iolist_size(IoData), Handle = make_ref(), From b3d1f5e6fe7b471d1b29466c7658486826e6ea0d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 18 Jul 2025 17:10:42 +0200 Subject: [PATCH 22/31] fix(socket): preserve select handle + handle decongestion correctly --- apps/emqx/src/emqx_socket_connection.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index bdb2012ca3f..96cd2495c97 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -981,13 +981,15 @@ handle_send_ready(Socket, SS = #congested{sendq = SQ}, State) -> Handle = make_ref(), case socket:send(Socket, IoData, [], Handle) of ok -> - {ok, State}; + NState = State#state{sockstate = idle}, + {ok, NState}; {select, {_Info, Rest}} -> %% Partially accepted, renew deadline. {ok, queue_send(Handle, Rest, State)}; {select, _Info} -> %% Totally congested, keep the deadline. - NState = State#state{sockstate = SS#congested{sendq = IoData}}, + NSS = SS#congested{handle = Handle, sendq = IoData}, + NState = State#state{sockstate = NSS}, {ok, NState}; {error, {Reason, _Rest}} -> %% Defer error handling: @@ -996,7 +998,7 @@ handle_send_ready(Socket, SS = #congested{sendq = SQ}, State) -> {ok, {sock_error, Reason}, State} end. -queue_send(Handle, IoData, State = #state{sockstate = idle, listener = {Type, Name}}) -> +queue_send(Handle, IoData, State = #state{listener = {Type, Name}}) -> Timeout = emqx_config:get_listener_conf(Type, Name, [tcp_options, send_timeout], 15_000), Deadline = erlang:monotonic_time(millisecond) + Timeout, SockState = #congested{handle = Handle, deadline = Deadline, sendq = [IoData]}, From a724a6502c38b4b27a7d882208bb89c41cac83d5 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 18 Jul 2025 17:11:52 +0200 Subject: [PATCH 23/31] test(client): add congested-then-decongested client testcase --- apps/emqx/test/emqx_client_SUITE.erl | 90 +++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/apps/emqx/test/emqx_client_SUITE.erl b/apps/emqx/test/emqx_client_SUITE.erl index 014e1c10712..435dd96f5de 100644 --- a/apps/emqx/test/emqx_client_SUITE.erl +++ b/apps/emqx/test/emqx_client_SUITE.erl @@ -77,7 +77,8 @@ groups() -> t_sock_closed_instantly, t_sock_closed_quickly, t_sub_non_utf8_topic, - t_congestion_send_timeout + t_congestion_send_timeout, + t_congestion_decongested ]}, {socket, [], [ t_sock_keepalive, @@ -102,6 +103,8 @@ init_per_group(gen_tcp_listener, Config) -> "listeners.tcp.default.tcp_options.sndbuf = 4KB\n" "listeners.tcp.default.tcp_options.recbuf = 4KB\n" "listeners.tcp.default.tcp_options.high_watermark = 160KB\n" + %% t_congestion_decongested + "conn_congestion.min_alarm_sustain_duration = 0\n" %% others "listeners.ssl.default.ssl_options.verify = verify_peer\n"} ], @@ -117,6 +120,8 @@ init_per_group(socket_listener, Config) -> "listeners.tcp.default.tcp_options.send_timeout = 2500\n" "listeners.tcp.default.tcp_options.sndbuf = 4KB\n" "listeners.tcp.default.tcp_options.recbuf = 4KB\n" + %% t_congestion_decongested + "conn_congestion.min_alarm_sustain_duration = 0\n" %% others "listeners.ssl.default.ssl_options.verify = verify_peer\n"} ], @@ -695,6 +700,89 @@ t_congestion_send_timeout(_) -> ?assertReceive({'DOWN', MRef, process, ConnPid, {shutdown, send_timeout}}, 5_000), ok = gen_tcp:close(Socket). +t_congestion_decongested(_) -> + ok = emqx_config:put_zone_conf(default, [mqtt, idle_timeout], 1000), + {ok, Socket} = gen_tcp:connect({127, 0, 0, 1}, 1883, [{active, false}, binary]), + %% Send manually constructed CONNECT: + ok = gen_tcp:send( + Socket, + emqx_frame:serialize( + ?CONNECT_PACKET(#mqtt_packet_connect{clientid = <<"t_congestion_decongested">>}) + ) + ), + {ok, Frames1} = gen_tcp:recv(Socket, 0, 1000), + {Pkt1, <<>>, Parser1} = emqx_frame:parse(Frames1, emqx_frame:initial_parse_state()), + ?assertMatch(?CONNACK_PACKET(0), Pkt1), + %% Send manually constructed SUBSCRIBE to subscribe to "t": + Topic = <<"t">>, + ok = gen_tcp:send( + Socket, + emqx_frame:serialize( + ?SUBSCRIBE_PACKET(1, [{Topic, #{rh => 0, rap => 0, nl => 0, qos => 0}}]) + ) + ), + {ok, Frames2} = gen_tcp:recv(Socket, 0, 1000), + {Pkt2, <<>>, _Parser2} = emqx_frame:parse(Frames2, Parser1), + ?assertMatch(?SUBACK_PACKET(1, [0]), Pkt2), + %% Subscribe to alarms: + ok = emqx_broker:subscribe(<<"$SYS/brokers/+/alarms/activate">>), + ok = emqx_broker:subscribe(<<"$SYS/brokers/+/alarms/deactivate">>), + %% Start filling up send buffers: + Publisher = fun Publisher(N) -> + %% Each message has 8000 bytes payload: + Payload = binary:copy(<>, 1000), + _ = emqx:publish(emqx_message:make(<<"publisher">>, Topic, Payload)), + ok = timer:sleep(50), + Publisher(N + 1) + end, + PublisherPid = spawn_link(fun() -> Publisher(1) end), + %% Start consumer, initially paused: + Consumer = fun + Consumer(paused) -> + receive + activate -> + Consumer(active) + after 5_000 -> + exit(activate_timeout) + end; + Consumer(active) -> + case gen_tcp:recv(Socket, 0, 1000) of + {ok, _Bytes} -> + Consumer(active); + {error, timeout} -> + Consumer(active); + {error, closed} -> + exit(closed) + end + end, + ConsumerPid = spawn_link(fun() -> Consumer(paused) end), + %% Congestion alarm should be raised soon: + {deliver, _, AlarmActivated} = + ?assertReceive({deliver, <<"$SYS/brokers/+/alarms/activate">>, _}, 5_000), + ?assertMatch( + #{<<"name">> := <<"conn_congestion/t_congestion_decongested/undefined">>}, + emqx_utils_json:decode(emqx_message:payload(AlarmActivated)) + ), + %% Activate consumer, congestion should resolve soon: + ConsumerPid ! activate, + {deliver, _, AlarmDeactivated} = + ?assertReceive({deliver, <<"$SYS/brokers/+/alarms/deactivate">>, _}, 5_000), + ?assertMatch( + #{<<"name">> := <<"conn_congestion/t_congestion_decongested/undefined">>}, + emqx_utils_json:decode(emqx_message:payload(AlarmDeactivated)) + ), + %% Connection should be alive and well: + ?assertMatch( + SS when SS == idle; SS == running, + emqx_cth_broker:connection_info(sockstate, <<"t_congestion_decongested">>) + ), + %% Cleanup: + true = unlink(PublisherPid), + true = unlink(ConsumerPid), + exit(PublisherPid, shutdown), + exit(ConsumerPid, shutdown), + ok = gen_tcp:close(Socket). + %%-------------------------------------------------------------------- %% Helper functions %%-------------------------------------------------------------------- From 8bb14d97618bd9102c466ea1fe4df4d034926c9e Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Fri, 18 Jul 2025 17:13:30 +0200 Subject: [PATCH 24/31] chore(socket): annotate log events with connmod for observability --- apps/emqx/src/emqx_socket_connection.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 96cd2495c97..d1a901256ad 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -349,8 +349,10 @@ run_loop( zone = Zone } ) -> - Peername = emqx_channel:info(peername, Channel), - emqx_logger:set_metadata_peername(esockd:format(Peername)), + emqx_logger:set_proc_metadata(#{ + peername => esockd:format(emqx_channel:info(peername, Channel)), + connmod => ?MODULE + }), ShutdownPolicy = emqx_config:get_zone_conf(Zone, [force_shutdown]), _ = emqx_utils:tune_heap_size(ShutdownPolicy), _ = set_tcp_keepalive(Listener), From 7b8dcbcd9db508397cc4c6f96d5a7766d92ed620 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 14:00:09 +0200 Subject: [PATCH 25/31] chore(frame): avoid using 0 to signal "some more" bytes expected --- apps/emqx/src/emqx_frame.erl | 8 ++++---- apps/emqx/src/emqx_socket_connection.erl | 2 ++ apps/emqx/test/emqx_frame_SUITE.erl | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index 6e3a6dc4289..90d99ce0802 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -56,8 +56,8 @@ -type parse_state_initial() :: #options{}. -type parse_result() :: - %% Need more bytes out of stream, `0` means it's unclear how much more. - {_NeedMore :: non_neg_integer(), parse_state()} + %% Need more bytes out of stream. + {some_more | _NBytesMore :: pos_integer(), parse_state()} %% There's a full packet. | {emqx_types:packet(), binary(), parse_state_initial()}. @@ -144,7 +144,7 @@ parse( ) -> parse_body_frame(Bin, Header, Need, Body, Options); parse(<<>>, State) -> - {0, State}. + {some_more, State}. %% @doc Parses _complete_ binary frame into a single `#mqtt_packet{}`. -spec parse_complete(iodata(), parse_state_initial()) -> @@ -170,7 +170,7 @@ parse_complete( end. parse_remaining_len(<<>>, Header, Mult, Length, Options) -> - {_NeedMore = 0, #remlen{hdr = Header, len = Length, mult = Mult, opts = Options}}; + {some_more, #remlen{hdr = Header, len = Length, mult = Mult, opts = Options}}; parse_remaining_len(<<0:8, Rest/binary>>, Header, 1, 0, Options) -> Packet = parse_bodyless_packet(Header), {Packet, Rest, Options}; diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index d1a901256ad..0c1e505ed33 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -428,6 +428,8 @@ wakeup_from_hib(Parent, State) -> -compile({inline, [sock_async_recv/2]}). +sock_async_recv(Socket, some_more) -> + socket:recv(Socket, 0, [], nowait); sock_async_recv(Socket, Len) -> socket:recv(Socket, Len, [], nowait). diff --git a/apps/emqx/test/emqx_frame_SUITE.erl b/apps/emqx/test/emqx_frame_SUITE.erl index 1344968c5f0..4149b2acef7 100644 --- a/apps/emqx/test/emqx_frame_SUITE.erl +++ b/apps/emqx/test/emqx_frame_SUITE.erl @@ -125,8 +125,8 @@ t_parse_cont(_) -> Packet = ?CONNECT_PACKET(#mqtt_packet_connect{}), ParseState = emqx_frame:initial_parse_state(), <> = serialize_to_binary(Packet), - {0, ContParse} = emqx_frame:parse(<<>>, ParseState), - {0, ContParse1} = emqx_frame:parse(HdrBin, ContParse), + {some_more, ContParse} = emqx_frame:parse(<<>>, ParseState), + {some_more, ContParse1} = emqx_frame:parse(HdrBin, ContParse), {12, ContParse2} = emqx_frame:parse(LenBin, ContParse1), {12, ContParse3} = emqx_frame:parse(<<>>, ContParse2), {Packet, <<>>, _} = emqx_frame:parse(RestBin, ContParse3). From e66c98c528a49a38f347786094d1a17beab2c7d7 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 14:23:14 +0200 Subject: [PATCH 26/31] chore(socket): drop dead copy-paste artifacts Co-authored-by: zmstone --- apps/emqx/src/emqx_socket_connection.erl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 0c1e505ed33..64d518f8068 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -1069,14 +1069,6 @@ handle_info({sock_error, Reason}, State) -> false -> ok end, handle_info({sock_closed, Reason}, ensure_close_socket(Reason, State)); -%% handle QUIC control stream events -handle_info({quic, Event, Handle, Prop}, State) when is_atom(Event) -> - case emqx_quic_stream:Event(Handle, Prop, State) of - {{continue, Msgs}, NewState} -> - {ok, Msgs, NewState}; - Other -> - Other - end; handle_info(Info, State) -> with_channel(handle_info, [Info], State). From 9a26fb3dd3d2658ae4c67dd259ef9bd4ce5ac558 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 14:08:13 +0200 Subject: [PATCH 27/31] chore(conn): mention related `emqx_socket_connection` in header --- apps/emqx/src/emqx_connection.erl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/emqx/src/emqx_connection.erl b/apps/emqx/src/emqx_connection.erl index 9e39b8b4988..cadd04b882e 100644 --- a/apps/emqx/src/emqx_connection.erl +++ b/apps/emqx/src/emqx_connection.erl @@ -8,7 +8,13 @@ %% - TCP/TLS connection %% - QUIC Stream %% -%% for WebSocket @see emqx_ws_connection.erl +%% For WebSocket transport, @see `emqx_ws_connection`. +%% For `esockd_socket` transport, @see `emqx_socket_connection`. + +%% NOTE +%% When changing this module, please make an effort to port changes to +%% `emqx_socket_connection` module if they make sense there, and vice +%% versa. -module(emqx_connection). -include("emqx.hrl"). From 8cf1ee070afee4e793ef5a40c2ddfdefa13cb69d Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 14:28:37 +0200 Subject: [PATCH 28/31] chore(schema): mention `tcp_backend` change requires listener restart --- rel/i18n/emqx_schema.hocon | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rel/i18n/emqx_schema.hocon b/rel/i18n/emqx_schema.hocon index f6d3e44b1be..3e672f7e462 100644 --- a/rel/i18n/emqx_schema.hocon +++ b/rel/i18n/emqx_schema.hocon @@ -619,7 +619,9 @@ fields_mqtt_opts_tcp_backend.desc: - `gen_tcp`: Standard backend, in use since EMQX 5.0 release. - `socket`: Experimental backend, looking to improve message latency and compute resource usage. - Note that some `tcp_options` settings will have no effect when using this backend, e.g.: `high_watermark` and `send_timeout_close`.""" + Note that some `tcp_options` settings will have no effect when using this backend, e.g.: `high_watermark` and `send_timeout_close`. + +Changing the backend require restarting the listener, which will terminate all its active connections.""" fields_mqtt_opts_tcp_backend.label: """TCP Backend""" From b255dddbf361e493a6926d0312c3bf764ee223f4 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 19:24:51 +0200 Subject: [PATCH 29/31] fix(schema): do not announce `socket` TCP backend under Windows --- apps/emqx/src/emqx_schema.erl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/apps/emqx/src/emqx_schema.erl b/apps/emqx/src/emqx_schema.erl index 4c5acc0f251..2166464ada2 100644 --- a/apps/emqx/src/emqx_schema.erl +++ b/apps/emqx/src/emqx_schema.erl @@ -629,11 +629,17 @@ fields("mqtt_tcp_listener") -> mqtt_listener(1883) ++ [ {"tcp_backend", - sc(hoconsc:enum([gen_tcp, socket]), #{ - default => <<"gen_tcp">>, - desc => ?DESC(fields_mqtt_opts_tcp_backend), - importance => ?IMPORTANCE_LOW - })} + sc( + case os:type() of + {unix, _} -> hoconsc:enum([gen_tcp, socket]); + {win32, _} -> hoconsc:enum([gen_tcp]) + end, + #{ + default => <<"gen_tcp">>, + desc => ?DESC(fields_mqtt_opts_tcp_backend), + importance => ?IMPORTANCE_LOW + } + )} ] ++ mqtt_parse_options() ++ [ From 367fd4b404fac900912d94b24913012e6a138842 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Tue, 22 Jul 2025 20:18:41 +0200 Subject: [PATCH 30/31] chore(socket): address dialyzer concerns --- apps/emqx/src/emqx_socket_connection.erl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/emqx/src/emqx_socket_connection.erl b/apps/emqx/src/emqx_socket_connection.erl index 64d518f8068..67fc1f9d04d 100644 --- a/apps/emqx/src/emqx_socket_connection.erl +++ b/apps/emqx/src/emqx_socket_connection.erl @@ -511,9 +511,7 @@ process_msg(Msg, State) -> {ok, NextMsg, NState} -> process_msg(NextMsg, NState); {stop, Reason, NState} -> - {stop, Reason, NState}; - {stop, Reason} -> - {stop, Reason, State} + {stop, Reason, NState} catch exit:normal -> {stop, normal, State}; @@ -560,7 +558,7 @@ handle_msg({'$socket', _Socket, abort, {_Handle, Reason}}, State = #state{sockst handle_info({sock_error, Reason}, State); false -> %% In case there were more than 1 outstanding select: - {ok, State} + ok end; handle_msg({recv, Data}, State) -> handle_data(Data, false, State); From 80429fac902ae8a0254699632d85f788bec0b0e2 Mon Sep 17 00:00:00 2001 From: Andrew Mayorov Date: Wed, 23 Jul 2025 11:17:15 +0200 Subject: [PATCH 31/31] test(telemetry): fix testcase timeout due to dirty server state --- apps/emqx_telemetry/test/emqx_telemetry_SUITE.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/emqx_telemetry/test/emqx_telemetry_SUITE.erl b/apps/emqx_telemetry/test/emqx_telemetry_SUITE.erl index c722b2fa2cc..d8c1ead541a 100644 --- a/apps/emqx_telemetry/test/emqx_telemetry_SUITE.erl +++ b/apps/emqx_telemetry/test/emqx_telemetry_SUITE.erl @@ -219,6 +219,7 @@ t_node_uuid(_) -> {ok, NodeUUID4} = emqx_telemetry_proto_v1:get_node_uuid(node()), ?assertEqual(NodeUUID2, NodeUUID3), ?assertEqual(NodeUUID3, NodeUUID4), + emqx_telemetry:stop_reporting(), ?assertMatch({badrpc, nodedown}, emqx_telemetry_proto_v1:get_node_uuid('fake@node')). t_cluster_uuid(Config) ->