diff mbox series

[net-next,2/4] tools: ynl: allow setting recv() size

Message ID 20240301230542.116823-3-kuba@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tools: ynl: add --dbg-small-recv for easier kernel testing | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-03-04--12-00 (tests: 889)

Commit Message

Jakub Kicinski March 1, 2024, 11:05 p.m. UTC
Make the size of the buffer we use for recv() configurable.
The details of the buffer sizing in netlink are somewhat
arcane, we could spend a lot of time polishing this API.
Let's just leave some hopefully helpful comments for now.
This is a for-developers-only feature, anyway.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

Comments

Donald Hunter March 4, 2024, 11:38 a.m. UTC | #1
Jakub Kicinski <kuba@kernel.org> writes:

> Make the size of the buffer we use for recv() configurable.
> The details of the buffer sizing in netlink are somewhat
> arcane, we could spend a lot of time polishing this API.
> Let's just leave some hopefully helpful comments for now.
> This is a for-developers-only feature, anyway.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  tools/net/ynl/lib/ynl.py | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
> index 92ade9105f31..bc5a526dbb99 100644
> --- a/tools/net/ynl/lib/ynl.py
> +++ b/tools/net/ynl/lib/ynl.py
> @@ -84,6 +84,10 @@ from .nlspec import SpecFamily
>      return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}"
>  
>  
> +class ConfigError(Exception):
> +    pass
> +
> +
>  class NlAttr:
>      ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
>      type_formats = {
> @@ -400,7 +404,8 @@ genl_family_name_to_id = None
>  
>  
>  class YnlFamily(SpecFamily):
> -    def __init__(self, def_path, schema=None, process_unknown=False):
> +    def __init__(self, def_path, schema=None, process_unknown=False,
> +                 recv_size=131072):
>          super().__init__(def_path, schema)
>  
>          self.include_raw = False
> @@ -423,6 +428,16 @@ genl_family_name_to_id = None
>          self.async_msg_ids = set()
>          self.async_msg_queue = []
>  
> +        # Note that netlink will use conservative (min) message size for
> +        # the first dump recv() on the socket, our setting will only matter
> +        # from the second recv() on.
> +        self._recv_size = recv_size
> +        # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo)
> +        # for a message, so smaller receive sizes will lead to truncation.
> +        # Note that the min size for other families may be larger than 4k!
> +        if self._recv_size < 4000:
> +            raise ConfigError()

Nit: You've added this between the declaration of async_msg_ids and
where it gets populated. Otherwise LGTM.

> +
>          for msg in self.msgs.values():
>              if msg.is_async:
>                  self.async_msg_ids.add(msg.rsp_value)
> @@ -799,7 +814,7 @@ genl_family_name_to_id = None
>      def check_ntf(self):
>          while True:
>              try:
> -                reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT)
> +                reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT)
>              except BlockingIOError:
>                  return
>  
> @@ -854,7 +869,7 @@ genl_family_name_to_id = None
>          done = False
>          rsp = []
>          while not done:
> -            reply = self.sock.recv(128 * 1024)
> +            reply = self.sock.recv(self._recv_size)
>              nms = NlMsgs(reply, attr_space=op.attr_set)
>              for nl_msg in nms:
>                  if nl_msg.extack:
Donald Hunter March 4, 2024, 1:38 p.m. UTC | #2
On Fri, 1 Mar 2024 at 23:05, Jakub Kicinski <kuba@kernel.org> wrote:
>
>  class YnlFamily(SpecFamily):
> -    def __init__(self, def_path, schema=None, process_unknown=False):
> +    def __init__(self, def_path, schema=None, process_unknown=False,
> +                 recv_size=131072):

An aside: what is the reason for choosing a 128k receive buffer? If I
remember correctly, netlink messages are capped at 32k.

>          super().__init__(def_path, schema)
>
>          self.include_raw = False
> @@ -423,6 +428,16 @@ genl_family_name_to_id = None
>          self.async_msg_ids = set()
>          self.async_msg_queue = []
>
> +        # Note that netlink will use conservative (min) message size for
> +        # the first dump recv() on the socket, our setting will only matter

I'm curious, why does it behave like this?

> +        # from the second recv() on.
> +        self._recv_size = recv_size
Jakub Kicinski March 4, 2024, 2:58 p.m. UTC | #3
On Mon, 4 Mar 2024 13:38:51 +0000 Donald Hunter wrote:
> >  class YnlFamily(SpecFamily):
> > -    def __init__(self, def_path, schema=None, process_unknown=False):
> > +    def __init__(self, def_path, schema=None, process_unknown=False,
> > +                 recv_size=131072):  
> 
> An aside: what is the reason for choosing a 128k receive buffer? If I
> remember correctly, netlink messages are capped at 32k.

Attributes, not messages, right? But large messages are relatively
rare, this is to make dump use fewer syscalls. Dump can give us multiple
message on each recv().

> >          super().__init__(def_path, schema)
> >
> >          self.include_raw = False
> > @@ -423,6 +428,16 @@ genl_family_name_to_id = None
> >          self.async_msg_ids = set()
> >          self.async_msg_queue = []
> >
> > +        # Note that netlink will use conservative (min) message size for
> > +        # the first dump recv() on the socket, our setting will only matter  
> 
> I'm curious, why does it behave like this?

Dump is initiated inside a send() system call, so that we can
validate arguments and return any init errors directly.
That means we don't know what buf size will be used by subsequent
recv()s when we produce the first message :(
Donald Hunter March 4, 2024, 3:57 p.m. UTC | #4
On Mon, 4 Mar 2024 at 14:58, Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Mon, 4 Mar 2024 13:38:51 +0000 Donald Hunter wrote:
> > >  class YnlFamily(SpecFamily):
> > > -    def __init__(self, def_path, schema=None, process_unknown=False):
> > > +    def __init__(self, def_path, schema=None, process_unknown=False,
> > > +                 recv_size=131072):
> >
> > An aside: what is the reason for choosing a 128k receive buffer? If I
> > remember correctly, netlink messages are capped at 32k.
>
> Attributes, not messages, right? But large messages are relatively
> rare, this is to make dump use fewer syscalls. Dump can give us multiple
> message on each recv().

I did mean messages:

https://elixir.bootlin.com/linux/latest/source/net/netlink/af_netlink.c#L1958

For rt_link I see ~21 messages per recv():

./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/rt_link.yaml \
    --dump getlink --dbg-small-recv 131072 > /dev/null
Recv: read 3260 bytes, 2 messages
   nl_len = 1432 (1416) nl_flags = 0x2 nl_type = 16
   nl_len = 1828 (1812) nl_flags = 0x2 nl_type = 16
Recv: read 31180 bytes, 21 messages
...
Recv: read 31712 bytes, 22 messages
...

> > >          super().__init__(def_path, schema)
> > >
> > >          self.include_raw = False
> > > @@ -423,6 +428,16 @@ genl_family_name_to_id = None
> > >          self.async_msg_ids = set()
> > >          self.async_msg_queue = []
> > >
> > > +        # Note that netlink will use conservative (min) message size for
> > > +        # the first dump recv() on the socket, our setting will only matter
> >
> > I'm curious, why does it behave like this?
>
> Dump is initiated inside a send() system call, so that we can
> validate arguments and return any init errors directly.
> That means we don't know what buf size will be used by subsequent
> recv()s when we produce the first message :(

Ah, makes sense.
Jakub Kicinski March 4, 2024, 4:20 p.m. UTC | #5
On Mon, 4 Mar 2024 15:57:57 +0000 Donald Hunter wrote:
> > Attributes, not messages, right? But large messages are relatively
> > rare, this is to make dump use fewer syscalls. Dump can give us multiple
> > message on each recv().  
> 
> I did mean messages:

That's kernel capping the allocations to 32kB, using larger allocations
is "costly" / likely to fail on production systems. Technically the
family can overrule that setting, and my recollection was that 128k or
64k was what iproute2 uses.. I could be wrong.
diff mbox series

Patch

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 92ade9105f31..bc5a526dbb99 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -84,6 +84,10 @@  from .nlspec import SpecFamily
     return f"Netlink error: {os.strerror(-self.nl_msg.error)}\n{self.nl_msg}"
 
 
+class ConfigError(Exception):
+    pass
+
+
 class NlAttr:
     ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little'])
     type_formats = {
@@ -400,7 +404,8 @@  genl_family_name_to_id = None
 
 
 class YnlFamily(SpecFamily):
-    def __init__(self, def_path, schema=None, process_unknown=False):
+    def __init__(self, def_path, schema=None, process_unknown=False,
+                 recv_size=131072):
         super().__init__(def_path, schema)
 
         self.include_raw = False
@@ -423,6 +428,16 @@  genl_family_name_to_id = None
         self.async_msg_ids = set()
         self.async_msg_queue = []
 
+        # Note that netlink will use conservative (min) message size for
+        # the first dump recv() on the socket, our setting will only matter
+        # from the second recv() on.
+        self._recv_size = recv_size
+        # Netlink will always allocate at least PAGE_SIZE - sizeof(skb_shinfo)
+        # for a message, so smaller receive sizes will lead to truncation.
+        # Note that the min size for other families may be larger than 4k!
+        if self._recv_size < 4000:
+            raise ConfigError()
+
         for msg in self.msgs.values():
             if msg.is_async:
                 self.async_msg_ids.add(msg.rsp_value)
@@ -799,7 +814,7 @@  genl_family_name_to_id = None
     def check_ntf(self):
         while True:
             try:
-                reply = self.sock.recv(128 * 1024, socket.MSG_DONTWAIT)
+                reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT)
             except BlockingIOError:
                 return
 
@@ -854,7 +869,7 @@  genl_family_name_to_id = None
         done = False
         rsp = []
         while not done:
-            reply = self.sock.recv(128 * 1024)
+            reply = self.sock.recv(self._recv_size)
             nms = NlMsgs(reply, attr_space=op.attr_set)
             for nl_msg in nms:
                 if nl_msg.extack: