Message ID | 20240301230542.116823-3-kuba@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tools: ynl: add --dbg-small-recv for easier kernel testing | expand |
Jakub Kicinski <kuba@kernel.org> writes: > Make the size of the buffer we use for recv() configurable. > The details of the buffer sizing in netlink are somewhat > arcane, we could spend a lot of time polishing this API. > Let's just leave some hopefully helpful comments for now. > This is a for-developers-only feature, anyway. > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > --- > tools/net/ynl/lib/ynl.py | 21 ++++++++++++++++++--- > 1 file changed, 18 insertions(+), 3 deletions(-) > > diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py > index 92ade9105f31..bc5a526dbb99 100644 > --- a/tools/net/ynl/lib/ynl.py > +++ b/tools/net/ynl/lib/ynl.py > @@ -84,6 +84,10 @@ from .nlspec import SpecFamily > return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}" > > > +class ConfigError(Exception): > + pass > + > + > class NlAttr: > ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) > type_formats = { > @@ -400,7 +404,8 @@ genl_family_name_to_id = None > > > class YnlFamily(SpecFamily): > - def __init__(self, def_path, schema=None, process_unknown=False): > + def __init__(self, def_path, schema=None, process_unknown=False, > + recv_size=131072): > super().__init__(def_path, schema) > > self.include_raw = False > @@ -423,6 +428,16 @@ genl_family_name_to_id = None > self.async_msg_ids = set() > self.async_msg_queue = [] > > + # Note that netlink will use conservative (min) message size for > + # the first dump recv() on the socket, our setting will only matter > + # from the second recv() on. > + self._recv_size = recv_size > + # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo) > + # for a message, so smaller receive sizes will lead to truncation. > + # Note that the min size for other families may be larger than 4k! > + if self._recv_size < 4000: > + raise ConfigError() Nit: You've added this between the declaration of async_msg_ids and where it gets populated. Otherwise LGTM. > + > for msg in self.msgs.values(): > if msg.is_async: > self.async_msg_ids.add(msg.rsp_value) > @@ -799,7 +814,7 @@ genl_family_name_to_id = None > def check_ntf(self): > while True: > try: > - reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT) > + reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) > except BlockingIOError: > return > > @@ -854,7 +869,7 @@ genl_family_name_to_id = None > done = False > rsp = [] > while not done: > - reply = self.sock.recv(128 * 1024) > + reply = self.sock.recv(self._recv_size) > nms = NlMsgs(reply, attr_space=op.attr_set) > for nl_msg in nms: > if nl_msg.extack:
On Fri, 1 Mar 2024 at 23:05, Jakub Kicinski <kuba@kernel.org> wrote: > > class YnlFamily(SpecFamily): > - def __init__(self, def_path, schema=None, process_unknown=False): > + def __init__(self, def_path, schema=None, process_unknown=False, > + recv_size=131072): An aside: what is the reason for choosing a 128k receive buffer? If I remember correctly, netlink messages are capped at 32k. > super().__init__(def_path, schema) > > self.include_raw = False > @@ -423,6 +428,16 @@ genl_family_name_to_id = None > self.async_msg_ids = set() > self.async_msg_queue = [] > > + # Note that netlink will use conservative (min) message size for > + # the first dump recv() on the socket, our setting will only matter I'm curious, why does it behave like this? > + # from the second recv() on. > + self._recv_size = recv_size
On Mon, 4 Mar 2024 13:38:51 +0000 Donald Hunter wrote: > > class YnlFamily(SpecFamily): > > - def __init__(self, def_path, schema=None, process_unknown=False): > > + def __init__(self, def_path, schema=None, process_unknown=False, > > + recv_size=131072): > > An aside: what is the reason for choosing a 128k receive buffer? If I > remember correctly, netlink messages are capped at 32k. Attributes, not messages, right? But large messages are relatively rare, this is to make dump use fewer syscalls. Dump can give us multiple message on each recv(). > > super().__init__(def_path, schema) > > > > self.include_raw = False > > @@ -423,6 +428,16 @@ genl_family_name_to_id = None > > self.async_msg_ids = set() > > self.async_msg_queue = [] > > > > + # Note that netlink will use conservative (min) message size for > > + # the first dump recv() on the socket, our setting will only matter > > I'm curious, why does it behave like this? Dump is initiated inside a send() system call, so that we can validate arguments and return any init errors directly. That means we don't know what buf size will be used by subsequent recv()s when we produce the first message :(
On Mon, 4 Mar 2024 at 14:58, Jakub Kicinski <kuba@kernel.org> wrote: > > On Mon, 4 Mar 2024 13:38:51 +0000 Donald Hunter wrote: > > > class YnlFamily(SpecFamily): > > > - def __init__(self, def_path, schema=None, process_unknown=False): > > > + def __init__(self, def_path, schema=None, process_unknown=False, > > > + recv_size=131072): > > > > An aside: what is the reason for choosing a 128k receive buffer? If I > > remember correctly, netlink messages are capped at 32k. > > Attributes, not messages, right? But large messages are relatively > rare, this is to make dump use fewer syscalls. Dump can give us multiple > message on each recv(). I did mean messages: https://elixir.bootlin.com/linux/latest/source/net/netlink/af_netlink.c#L1958 For rt_link I see ~21 messages per recv(): ./tools/net/ynl/cli.py \ --spec Documentation/netlink/specs/rt_link.yaml \ --dump getlink --dbg-small-recv 131072 > /dev/null Recv: read 3260 bytes, 2 messages nl_len = 1432 (1416) nl_flags = 0x2 nl_type = 16 nl_len = 1828 (1812) nl_flags = 0x2 nl_type = 16 Recv: read 31180 bytes, 21 messages ... Recv: read 31712 bytes, 22 messages ... > > > super().__init__(def_path, schema) > > > > > > self.include_raw = False > > > @@ -423,6 +428,16 @@ genl_family_name_to_id = None > > > self.async_msg_ids = set() > > > self.async_msg_queue = [] > > > > > > + # Note that netlink will use conservative (min) message size for > > > + # the first dump recv() on the socket, our setting will only matter > > > > I'm curious, why does it behave like this? > > Dump is initiated inside a send() system call, so that we can > validate arguments and return any init errors directly. > That means we don't know what buf size will be used by subsequent > recv()s when we produce the first message :( Ah, makes sense.
On Mon, 4 Mar 2024 15:57:57 +0000 Donald Hunter wrote: > > Attributes, not messages, right? But large messages are relatively > > rare, this is to make dump use fewer syscalls. Dump can give us multiple > > message on each recv(). > > I did mean messages: That's kernel capping the allocations to 32kB, using larger allocations is "costly" / likely to fail on production systems. Technically the family can overrule that setting, and my recollection was that 128k or 64k was what iproute2 uses.. I could be wrong.
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 92ade9105f31..bc5a526dbb99 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -84,6 +84,10 @@ from .nlspec import SpecFamily return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}" +class ConfigError(Exception): + pass + + class NlAttr: ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) type_formats = { @@ -400,7 +404,8 @@ genl_family_name_to_id = None class YnlFamily(SpecFamily): - def __init__(self, def_path, schema=None, process_unknown=False): + def __init__(self, def_path, schema=None, process_unknown=False, + recv_size=131072): super().__init__(def_path, schema) self.include_raw = False @@ -423,6 +428,16 @@ genl_family_name_to_id = None self.async_msg_ids = set() self.async_msg_queue = [] + # Note that netlink will use conservative (min) message size for + # the first dump recv() on the socket, our setting will only matter + # from the second recv() on. + self._recv_size = recv_size + # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo) + # for a message, so smaller receive sizes will lead to truncation. + # Note that the min size for other families may be larger than 4k! + if self._recv_size < 4000: + raise ConfigError() + for msg in self.msgs.values(): if msg.is_async: self.async_msg_ids.add(msg.rsp_value) @@ -799,7 +814,7 @@ genl_family_name_to_id = None def check_ntf(self): while True: try: - reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT) + reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) except BlockingIOError: return @@ -854,7 +869,7 @@ genl_family_name_to_id = None done = False rsp = [] while not done: - reply = self.sock.recv(128 * 1024) + reply = self.sock.recv(self._recv_size) nms = NlMsgs(reply, attr_space=op.attr_set) for nl_msg in nms: if nl_msg.extack:
Make the size of the buffer we use for recv() configurable. The details of the buffer sizing in netlink are somewhat arcane, we could spend a lot of time polishing this API. Let's just leave some hopefully helpful comments for now. This is a for-developers-only feature, anyway. Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- tools/net/ynl/lib/ynl.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-)