diff mbox series

[v5,10/11] net/9p: add p9_msg_buf_size()

Message ID 2ade510b2e67a30c1064bcd7a8b6c73e6777b9ed.1657636554.git.linux_oss@crudebyte.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series remove msize limit in virtio transport | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/cc_maintainers warning 4 maintainers not CCed: pabeni@redhat.com kuba@kernel.org davem@davemloft.net edumazet@google.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Blank lines aren't necessary after an open brace '{' CHECK: Blank lines aren't necessary before a close brace '}' CHECK: Prefer kernel type 's32' over 'int32_t' CHECK: Unnecessary parentheses around 'c->proto_version != p9_proto_2000L' CHECK: Unnecessary parentheses around 'c->proto_version != p9_proto_2000u' WARNING: Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON() WARNING: Missing a blank line after declarations WARNING: braces {} are not necessary for single statement blocks WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Christian Schoenebeck July 12, 2022, 2:31 p.m. UTC
This new function calculates a buffer size suitable for holding the
intended 9p request or response. For rather small message types (which
applies to almost all 9p message types actually) simply use hard coded
values. For some variable-length and potentially large message types
calculate a more precise value according to what data is actually
transmitted to avoid unnecessarily huge buffers.

Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com>
---
 net/9p/protocol.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++
 net/9p/protocol.h |   2 +
 2 files changed, 156 insertions(+)

Comments

Dominique Martinet July 13, 2022, 10:29 a.m. UTC | #1
Christian Schoenebeck wrote on Tue, Jul 12, 2022 at 04:31:33PM +0200:
> This new function calculates a buffer size suitable for holding the
> intended 9p request or response. For rather small message types (which
> applies to almost all 9p message types actually) simply use hard coded
> values. For some variable-length and potentially large message types
> calculate a more precise value according to what data is actually
> transmitted to avoid unnecessarily huge buffers.
> 
> Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com>

Overally already had checked a few times but I just went through
client.c va argument lists as well this time, just a few nitpicks.


> ---
>  net/9p/protocol.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++
>  net/9p/protocol.h |   2 +
>  2 files changed, 156 insertions(+)
> 
> diff --git a/net/9p/protocol.c b/net/9p/protocol.c
> index 3754c33e2974..49939e8cde2a 100644
> --- a/net/9p/protocol.c
> +++ b/net/9p/protocol.c
> @@ -23,6 +23,160 @@
>  
>  #include <trace/events/9p.h>
>  
> +/* len[2] text[len] */
> +#define P9_STRLEN(s) \
> +	(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))
> +
> +/**
> + * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
> + * intended 9p message.
> + * @c: client
> + * @type: message type
> + * @fmt: format template for assembling request message
> + * (see p9pdu_vwritef)
> + * @ap: variable arguments to be fed to passed format template
> + * (see p9pdu_vwritef)
> + *
> + * Note: Even for response types (P9_R*) the format template and variable
> + * arguments must always be for the originating request type (P9_T*).
> + */
> +size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
> +			const char *fmt, va_list ap)
> +{
> +	/* size[4] type[1] tag[2] */
> +	const int hdr = 4 + 1 + 2;
> +	/* ename[s] errno[4] */
> +	const int rerror_size = hdr + P9_ERRMAX + 4;
> +	/* ecode[4] */
> +	const int rlerror_size = hdr + 4;
> +	const int err_size =
> +		c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;
> +
> +	switch (type) {
> +
> +	/* message types not used at all */
> +	case P9_TERROR:
> +	case P9_TLERROR:
> +	case P9_TAUTH:
> +	case P9_RAUTH:
> +		BUG();
> +
> +	/* variable length & potentially large message types */
> +	case P9_TATTACH:
> +		BUG_ON(strcmp("ddss?u", fmt));
> +		va_arg(ap, int32_t);
> +		va_arg(ap, int32_t);
> +		{
> +			const char *uname = va_arg(ap, const char *);
> +			const char *aname = va_arg(ap, const char *);
> +			/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
> +			return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
> +		}
> +	case P9_TWALK:
> +		BUG_ON(strcmp("ddT", fmt));
> +		va_arg(ap, int32_t);
> +		va_arg(ap, int32_t);
> +		{
> +			uint i, nwname = max(va_arg(ap, int), 0);

I was about to say that the max is useless as for loop would be cut
short, but these are unsigned... So the code in protocol.c p9pdu_vwritef
'T' has a bug (int cast directly to uint16): do you want to fix it or
shall I go ahead?

> +			size_t wname_all;
> +			const char **wnames = va_arg(ap, const char **);
> +			for (i = 0, wname_all = 0; i < nwname; ++i) {
> +				wname_all += P9_STRLEN(wnames[i]);
> +			}
> +			/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
> +			return hdr + 4 + 4 + 2 + wname_all;
> +		}
> +	case P9_RWALK:
> +		BUG_ON(strcmp("ddT", fmt));
> +		va_arg(ap, int32_t);
> +		va_arg(ap, int32_t);
> +		{
> +			uint nwname = va_arg(ap, int);
> +			/* nwqid[2] nwqid*(wqid[13]) */
> +			return max_t(size_t, hdr + 2 + nwname * 13, err_size);
> +		}
> +	case P9_TCREATE:
> +		BUG_ON(strcmp("dsdb?s", fmt));
> +		va_arg(ap, int32_t);
> +		{
> +			const char *name = va_arg(ap, const char *);
> +			if ((c->proto_version != p9_proto_2000u) &&
> +			    (c->proto_version != p9_proto_2000L))

(I don't think 9p2000.L can call TCREATE, but it doesn't really hurt
either)

> +				/* fid[4] name[s] perm[4] mode[1] */
> +				return hdr + 4 + P9_STRLEN(name) + 4 + 1;
> +			{
> +				va_arg(ap, int32_t);
> +				va_arg(ap, int);
> +				{
> +					const char *ext = va_arg(ap, const char *);
> +					/* fid[4] name[s] perm[4] mode[1] extension[s] */
> +					return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
> +				}
> +			}
> +		}
> +	case P9_TLCREATE:
> +		BUG_ON(strcmp("dsddg", fmt));
> +		va_arg(ap, int32_t);
> +		{
> +			const char *name = va_arg(ap, const char *);
> +			/* fid[4] name[s] flags[4] mode[4] gid[4] */
> +			return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
> +		}
> +	case P9_RREAD:
> +	case P9_RREADDIR:
> +		BUG_ON(strcmp("dqd", fmt));
> +		va_arg(ap, int32_t);
> +		va_arg(ap, int64_t);
> +		{
> +			const int32_t count = va_arg(ap, int32_t);
> +			/* count[4] data[count] */
> +			return max_t(size_t, hdr + 4 + count, err_size);
> +		}
> +	case P9_TWRITE:
> +		BUG_ON(strcmp("dqV", fmt));
> +		va_arg(ap, int32_t);
> +		va_arg(ap, int64_t);
> +		{
> +			const int32_t count = va_arg(ap, int32_t);
> +			/* fid[4] offset[8] count[4] data[count] */
> +			return hdr + 4 + 8 + 4 + count;
> +		}
> +	case P9_TRENAMEAT:

if we have trenameat we probably want trename, tunlinkat as well?
What's your criteria for counting individually vs slapping 8k at it?

In this particular case, oldname/newname are single component names
within a directory so this is capped at 2*(4+256), that could easily fit
in 4k without bothering.

> +		BUG_ON(strcmp("dsds", fmt));
> +		va_arg(ap, int32_t);
> +		{
> +			const char *oldname = va_arg(ap, const char *);
> +			va_arg(ap, int32_t);
> +			{
> +				const char *newname = va_arg(ap, const char *);

(style nitpick) I don't see the point of nesting another level of
indentation here, it feels cleaner to declare oldname/newname at the
start of the block and be done with it.
Doesn't really matter but it was a bit confusing with the if for TCREATE
earlier.

> +				/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
> +				return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
> +			}
> +		}
> +	case P9_RERROR:
> +		return rerror_size;
> +	case P9_RLERROR:
> +		return rlerror_size;
> +
> +	/* small message types */

ditto: what's your criteria for 4k vs 8k?

> +	case P9_TSTAT:

this is just fid[4], so 4k is more than enough

> +	case P9_RSTAT:

also fixed size 4+4+8+8+8+8+8+8+4 -- fits in 4k.

> +	case P9_TSYMLINK:

that one has symlink target which can be arbitrarily long (filesystem
specific, 4k is the usual limit for linux but some filesystem I don't
know might handle more -- it might be worth going through the trouble of
going through it.

Ah, we can't support an arbitrary length as we won't know the size for
rreadlink before the reply comes, so we have to set some arbitrary
max. Okay for 8k.

> +	case P9_RREADLINK:

Ok as above.

> +	case P9_TXATTRWALK:

xattr names seem capped at 256, could fit 4k but ok for 8.

> +	case P9_TXATTRCREATE:

same, ok for either 4 or 8.

> +	case P9_TLINK:

name is component name inside directory so capped at 256, but ok.

> +	case P9_TMKDIR:

same

> +	case P9_TUNLINKAT:

same

> +		return 8 * 1024;
> +
> +	/* tiny message types */
> +	default:

I went through things we didn't list:
mknod has a name but it's also component within directory, so should be
consistent with mkdir/unlinkat
trename, same.

tlock contains client_id which comes from hostname.. I think that's
capped at 256 as well? so ok for 4k.

rest all looks ok to me.


--
Dominique


> +		return 4 * 1024;
> +
> +	}
> +}
> +
>  static int
>  p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
>  
> diff --git a/net/9p/protocol.h b/net/9p/protocol.h
> index 6d719c30331a..ad2283d1f96b 100644
> --- a/net/9p/protocol.h
> +++ b/net/9p/protocol.h
> @@ -8,6 +8,8 @@
>   *  Copyright (C) 2008 by IBM, Corp.
>   */
>  
> +size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
> +			const char *fmt, va_list ap);
>  int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
>  		  va_list ap);
>  int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
OA
Christian Schoenebeck July 13, 2022, 1:06 p.m. UTC | #2
On Mittwoch, 13. Juli 2022 12:29:31 CEST Dominique Martinet wrote:
> Christian Schoenebeck wrote on Tue, Jul 12, 2022 at 04:31:33PM +0200:
> > This new function calculates a buffer size suitable for holding the
> > intended 9p request or response. For rather small message types (which
> > applies to almost all 9p message types actually) simply use hard coded
> > values. For some variable-length and potentially large message types
> > calculate a more precise value according to what data is actually
> > transmitted to avoid unnecessarily huge buffers.
> > 
> > Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com>
> 
> Overally already had checked a few times but I just went through
> client.c va argument lists as well this time, just a few nitpicks.
> 
> > ---
> > 
> >  net/9p/protocol.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++
> >  net/9p/protocol.h |   2 +
> >  2 files changed, 156 insertions(+)
> > 
> > diff --git a/net/9p/protocol.c b/net/9p/protocol.c
> > index 3754c33e2974..49939e8cde2a 100644
> > --- a/net/9p/protocol.c
> > +++ b/net/9p/protocol.c
> > @@ -23,6 +23,160 @@
> > 
> >  #include <trace/events/9p.h>
> > 
> > +/* len[2] text[len] */
> > +#define P9_STRLEN(s) \
> > +	(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))
> > +
> > +/**
> > + * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
> > + * intended 9p message.
> > + * @c: client
> > + * @type: message type
> > + * @fmt: format template for assembling request message
> > + * (see p9pdu_vwritef)
> > + * @ap: variable arguments to be fed to passed format template
> > + * (see p9pdu_vwritef)
> > + *
> > + * Note: Even for response types (P9_R*) the format template and variable
> > + * arguments must always be for the originating request type (P9_T*).
> > + */
> > +size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
> > +			const char *fmt, va_list ap)
> > +{
> > +	/* size[4] type[1] tag[2] */
> > +	const int hdr = 4 + 1 + 2;
> > +	/* ename[s] errno[4] */
> > +	const int rerror_size = hdr + P9_ERRMAX + 4;
> > +	/* ecode[4] */
> > +	const int rlerror_size = hdr + 4;
> > +	const int err_size =
> > +		c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;
> > +
> > +	switch (type) {
> > +
> > +	/* message types not used at all */
> > +	case P9_TERROR:
> > +	case P9_TLERROR:
> > +	case P9_TAUTH:
> > +	case P9_RAUTH:
> > +		BUG();
> > +
> > +	/* variable length & potentially large message types */
> > +	case P9_TATTACH:
> > +		BUG_ON(strcmp("ddss?u", fmt));
> > +		va_arg(ap, int32_t);
> > +		va_arg(ap, int32_t);
> > +		{
> > +			const char *uname = va_arg(ap, const char *);
> > +			const char *aname = va_arg(ap, const char *);
> > +			/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
> > +			return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
> > +		}
> > +	case P9_TWALK:
> > +		BUG_ON(strcmp("ddT", fmt));
> > +		va_arg(ap, int32_t);
> > +		va_arg(ap, int32_t);
> > +		{
> > +			uint i, nwname = max(va_arg(ap, int), 0);
> 
> I was about to say that the max is useless as for loop would be cut
> short, but these are unsigned... So the code in protocol.c p9pdu_vwritef
> 'T' has a bug (int cast directly to uint16): do you want to fix it or
> shall I go ahead?

I'd either send a separate patch today for fixing 'T', or if you want to handle it by yourself, then just go ahead.

> > +			size_t wname_all;
> > +			const char **wnames = va_arg(ap, const char **);
> > +			for (i = 0, wname_all = 0; i < nwname; ++i) {
> > +				wname_all += P9_STRLEN(wnames[i]);
> > +			}
> > +			/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
> > +			return hdr + 4 + 4 + 2 + wname_all;
> > +		}
> > +	case P9_RWALK:
> > +		BUG_ON(strcmp("ddT", fmt));
> > +		va_arg(ap, int32_t);
> > +		va_arg(ap, int32_t);
> > +		{
> > +			uint nwname = va_arg(ap, int);
> > +			/* nwqid[2] nwqid*(wqid[13]) */
> > +			return max_t(size_t, hdr + 2 + nwname * 13, err_size);
> > +		}
> > +	case P9_TCREATE:
> > +		BUG_ON(strcmp("dsdb?s", fmt));
> > +		va_arg(ap, int32_t);
> > +		{
> > +			const char *name = va_arg(ap, const char *);
> > +			if ((c->proto_version != p9_proto_2000u) &&
> > +			    (c->proto_version != p9_proto_2000L))
> 
> (I don't think 9p2000.L can call TCREATE, but it doesn't really hurt
> either)

Yes, Tcreate is only 9p2000 and 9p2000.u. Semantically this particular check here means "if proto == 9p.2000". I can't remember anymore why I came up with this inverted form here. I'll change it to "if (c->proto_version == p9_proto_legacy)".

> > +				/* fid[4] name[s] perm[4] mode[1] */
> > +				return hdr + 4 + P9_STRLEN(name) + 4 + 1;
> > +			{
> > +				va_arg(ap, int32_t);
> > +				va_arg(ap, int);
> > +				{
> > +					const char *ext = va_arg(ap, const char *);
> > +					/* fid[4] name[s] perm[4] mode[1] extension[s] */
> > +					return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
> > +				}
> > +			}
> > +		}
> > +	case P9_TLCREATE:
> > +		BUG_ON(strcmp("dsddg", fmt));
> > +		va_arg(ap, int32_t);
> > +		{
> > +			const char *name = va_arg(ap, const char *);
> > +			/* fid[4] name[s] flags[4] mode[4] gid[4] */
> > +			return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
> > +		}
> > +	case P9_RREAD:
> > +	case P9_RREADDIR:
> > +		BUG_ON(strcmp("dqd", fmt));
> > +		va_arg(ap, int32_t);
> > +		va_arg(ap, int64_t);
> > +		{
> > +			const int32_t count = va_arg(ap, int32_t);
> > +			/* count[4] data[count] */
> > +			return max_t(size_t, hdr + 4 + count, err_size);
> > +		}
> > +	case P9_TWRITE:
> > +		BUG_ON(strcmp("dqV", fmt));
> > +		va_arg(ap, int32_t);
> > +		va_arg(ap, int64_t);
> > +		{
> > +			const int32_t count = va_arg(ap, int32_t);
> > +			/* fid[4] offset[8] count[4] data[count] */
> > +			return hdr + 4 + 8 + 4 + count;
> > +		}
> 
> > +	case P9_TRENAMEAT:
> if we have trenameat we probably want trename, tunlinkat as well?
> What's your criteria for counting individually vs slapping 8k at it?
> 
> In this particular case, oldname/newname are single component names
> within a directory so this is capped at 2*(4+256), that could easily fit
> in 4k without bothering.

I have not taken the Linux kernel's current filename limit NAME_MAX (255) as basis, in that case you would be right. Instead I looked up what the maximum filename length among file systems in general was, and saw that ReiserFS supports up to slightly below 4k? So I took 4k as basis for the calculation used here, and the intention was to make this code more future proof. Because revisiting this code later on always takes quite some time and always has this certain potential to miss out details.

However if you want this to be based on what the Linux kernel currently supports, then I can also adjust this code to 255 being the basis.

Independent of the decision; additionally it might make sense to add something like:

#if NAME_MAX > 255
# error p9_msg_buf_size() needs adjustments
#endif

> > +		BUG_ON(strcmp("dsds", fmt));
> > +		va_arg(ap, int32_t);
> > +		{
> > +			const char *oldname = va_arg(ap, const char *);
> > +			va_arg(ap, int32_t);
> > +			{
> > +				const char *newname = va_arg(ap, const char *);
> 
> (style nitpick) I don't see the point of nesting another level of
> indentation here, it feels cleaner to declare oldname/newname at the
> start of the block and be done with it.

Because  va_arg(ap, int32_t);  must remain between those two declarations, and I think either the compiler or style check script was barking at me. But I will recheck, if possible I will remove the additional block scope here.

> > +				/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
> > +				return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
> > +			}
> > +		}
> > +	case P9_RERROR:
> > +		return rerror_size;
> > +	case P9_RLERROR:
> > +		return rlerror_size;
> > +
> > +	/* small message types */
> 
> ditto: what's your criteria for 4k vs 8k?

As above, 4k being the basis for directory entry names, plus PATH_MAX (4k) as basis for maximum path length.

However looking at it again, if NAME_MAX == 4k was assumed exactly, then Tsymlink would have the potential to exceed 8k, as it has name[s] and symtgt[s] plus the other fields.

> > +	case P9_TSTAT:
> this is just fid[4], so 4k is more than enough

I guess that was a typo and should have been Twstat instead?

> > +	case P9_RSTAT:
> also fixed size 4+4+8+8+8+8+8+8+4 -- fits in 4k.

Rstat contains stat[n] which in turn contains variable-length string fields (filename, owner name, group name)

> > +	case P9_TSYMLINK:
> that one has symlink target which can be arbitrarily long (filesystem
> specific, 4k is the usual limit for linux but some filesystem I don't
> know might handle more -- it might be worth going through the trouble of
> going through it.

Like mentioned above, if exactly NAME_MAX == 4k was assumed, then Tsymlink may even be >8k.

> Ah, we can't support an arbitrary length as we won't know the size for
> rreadlink before the reply comes, so we have to set some arbitrary
> max. Okay for 8k.
> 
> > +	case P9_RREADLINK:
> Ok as above.
> 
> > +	case P9_TXATTRWALK:
> xattr names seem capped at 256, could fit 4k but ok for 8.

Again current NAME_MAX (255) vs. assumed max. supported (4k) by filesystems.

> > +	case P9_TXATTRCREATE:
> same, ok for either 4 or 8.
> 
> > +	case P9_TLINK:
> name is component name inside directory so capped at 256, but ok.
> 
> > +	case P9_TMKDIR:
> same
> 
> > +	case P9_TUNLINKAT:
> same
> 
> > +		return 8 * 1024;
> > +
> > +	/* tiny message types */
> 
> > +	default:
> I went through things we didn't list:
> mknod has a name but it's also component within directory, so should be
> consistent with mkdir/unlinkat
> trename, same.

If 255 is assumed then it would be fine to not list here, otherwise with max. name 4k I should rather list it as 8k message.

> tlock contains client_id which comes from hostname.. I think that's
> capped at 256 as well? so ok for 4k.

Looks like that slipped through on my side completely. Again 255 being basis then yes it could be skipped, 4k would require being listed as 8k message then.

> rest all looks ok to me.

Thanks for the review! I know, that's really a dry patch to look at. :)

Best regards,
Christian Schoenebeck
Dominique Martinet July 13, 2022, 8:52 p.m. UTC | #3
Christian Schoenebeck wrote on Wed, Jul 13, 2022 at 03:06:01PM +0200:
> > > +	case P9_TWALK:
> > > +		BUG_ON(strcmp("ddT", fmt));
> > > +		va_arg(ap, int32_t);
> > > +		va_arg(ap, int32_t);
> > > +		{
> > > +			uint i, nwname = max(va_arg(ap, int), 0);
> > 
> > I was about to say that the max is useless as for loop would be cut
> > short, but these are unsigned... So the code in protocol.c p9pdu_vwritef
> > 'T' has a bug (int cast directly to uint16): do you want to fix it or
> > shall I go ahead?
> 
> I'd either send a separate patch today for fixing 'T', or if you want
> to handle it by yourself, then just go ahead.

I'd appreciate if you have time, doesn't make much difference though

> > > +	case P9_TCREATE:
> > > +		BUG_ON(strcmp("dsdb?s", fmt));
> > > +		va_arg(ap, int32_t);
> > > +		{
> > > +			const char *name = va_arg(ap, const char *);
> > > +			if ((c->proto_version != p9_proto_2000u) &&
> > > +			    (c->proto_version != p9_proto_2000L))
> > 
> > (I don't think 9p2000.L can call TCREATE, but it doesn't really hurt
> > either)
> 
> Yes, Tcreate is only 9p2000 and 9p2000.u. Semantically this particular
> check here means "if proto == 9p.2000". I can't remember anymore why I
> came up with this inverted form here. I'll change it to "if
> (c->proto_version == p9_proto_legacy)".

Sounds good.

> > > +	case P9_TRENAMEAT:
> > if we have trenameat we probably want trename, tunlinkat as well?
> > What's your criteria for counting individually vs slapping 8k at it?
> > 
> > In this particular case, oldname/newname are single component names
> > within a directory so this is capped at 2*(4+256), that could easily fit
> > in 4k without bothering.
> 
> I have not taken the Linux kernel's current filename limit NAME_MAX
> (255) as basis, in that case you would be right. Instead I looked up
> what the maximum filename length among file systems in general was,
> and saw that ReiserFS supports up to slightly below 4k? So I took 4k
> as basis for the calculation used here, and the intention was to make
> this code more future proof. Because revisiting this code later on
> always takes quite some time and always has this certain potential to
> miss out details.

hmm, that's pretty deeply engrained into the VFS but I guess it might
change eventually, yes.

I don't mind as long as we're consistent (cf. unlink/mkdir below), in
practice measuring doesn't cost much.

> Independent of the decision; additionally it might make sense to add
> something like:
> 
> #if NAME_MAX > 255
> # error p9_msg_buf_size() needs adjustments
> #endif

That's probably an understatement but I don't mind either way, it
doesn't hurt.


> > > +		BUG_ON(strcmp("dsds", fmt));
> > > +		va_arg(ap, int32_t);
> > > +		{
> > > +			const char *oldname = va_arg(ap, const char *);
> > > +			va_arg(ap, int32_t);
> > > +			{
> > > +				const char *newname = va_arg(ap, const char *);
> > 
> > (style nitpick) I don't see the point of nesting another level of
> > indentation here, it feels cleaner to declare oldname/newname at the
> > start of the block and be done with it.
> 
> Because  va_arg(ap, int32_t);  must remain between those two
> declarations, and I think either the compiler or style check script
> was barking at me. But I will recheck, if possible I will remove the
> additional block scope here.

Yes, I think it'd need to look like this:

	case foo:
		BUG_ON(...)
		va_arg(ap, int32_t);
		{
			const char *oldname = va_arg(ap, const char *);
			const char *newname;
			va_arg(ap, int32_t);
			newname = va_arg(ap, const_char *);
			...
		}
or
		{
			const char *oldname, *newname;
			oldname = va_arg(ap, const char *);
			va_arg(ap, int32_t)
			newname = va_arg(ap, const char *);
			...
		}
		
I guess the later is slightly easier on the eyes


> > > +	/* small message types */
> > 
> > ditto: what's your criteria for 4k vs 8k?
> 
> As above, 4k being the basis for directory entry names, plus PATH_MAX
> (4k) as basis for maximum path length.
> 
> However looking at it again, if NAME_MAX == 4k was assumed exactly,
> then Tsymlink would have the potential to exceed 8k, as it has name[s]
> and symtgt[s] plus the other fields.

yes.


> > > +	case P9_TSTAT:
> > this is just fid[4], so 4k is more than enough
> 
> I guess that was a typo and should have been Twstat instead?

Ah, had missed this because 9p2000.L's version of stat[n] is fixed size.
Sounds good.

> > > +	case P9_RSTAT:
> > also fixed size 4+4+8+8+8+8+8+8+4 -- fits in 4k.
> 
> Rstat contains stat[n] which in turn contains variable-length string
> fields (filename, owner name, group name)

Right, same mistake.

> 
> > > +	case P9_TSYMLINK:
> > that one has symlink target which can be arbitrarily long (filesystem
> > specific, 4k is the usual limit for linux but some filesystem I don't
> > know might handle more -- it might be worth going through the trouble of
> > going through it.
> 
> Like mentioned above, if exactly NAME_MAX == 4k was assumed, then
> Tsymlink may even be >8k.

And all the other remarks are 'yes if we assume bigger NAME_MAX' -- I'm
happy either way.


> > rest all looks ok to me.
> 
> Thanks for the review! I know, that's really a dry patch to look
> at. :)

Thanks for writing it in the first place ;)

--
Dominique
Christian Schoenebeck July 14, 2022, 1:14 p.m. UTC | #4
On Mittwoch, 13. Juli 2022 22:52:56 CEST Dominique Martinet wrote:
> Christian Schoenebeck wrote on Wed, Jul 13, 2022 at 03:06:01PM +0200:
> > > > +	case P9_TWALK:
> > > > +		BUG_ON(strcmp("ddT", fmt));
> > > > +		va_arg(ap, int32_t);
> > > > +		va_arg(ap, int32_t);
> > > > +		{
> > > > +			uint i, nwname = max(va_arg(ap, int), 0);
> > > 
> > > I was about to say that the max is useless as for loop would be cut
> > > short, but these are unsigned... So the code in protocol.c p9pdu_vwritef
> > > 'T' has a bug (int cast directly to uint16): do you want to fix it or
> > > shall I go ahead?
> > 
> > I'd either send a separate patch today for fixing 'T', or if you want
> > to handle it by yourself, then just go ahead.
> 
> I'd appreciate if you have time, doesn't make much difference though

Looking closer at this separate issue; there is probably nothing to fix. 'T'
(and 'R') in p9pdu_vwritef() pulls an 'int' argument from the stack. But the
actual variable is passed here:

struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
			      const unsigned char * const *wnames, int clone)
{
    ...
    req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
                        nwname, wnames);
    ...
}

So the variable being passed was already uint16_t, which made me re-aware why
this is working anyway: Because C and C++ have this nice language hack that
any variadic integer variable smaller than 'int' is automatically casted to
'int' before being passed.

I mean I could clamp the pulled argument like:

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3754c33e2974..5fd1e948c86a 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -441,7 +441,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
                        }
                        break;
                case 'T':{
-                               uint16_t nwname = va_arg(ap, int);
+                               uint16_t nwname = clamp_t(int, va_arg(ap, int), 0, USHRT_MAX);
                                const char **wnames = va_arg(ap, const char **);
 
                                errcode = p9pdu_writef(pdu, proto_version, "w",
@@ -462,7 +462,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
                        }
                        break;
                case 'R':{
-                               uint16_t nwqid = va_arg(ap, int);
+                               uint16_t nwqid = clamp_t(int, va_arg(ap, int), 0, USHRT_MAX);
                                struct p9_qid *wqids =
                                    va_arg(ap, struct p9_qid *);

But it's pretty much pointless overhead. Another option would be to change
va_arg(ap, int) -> va_arg(ap, uint16_t), just to make it more clear what was
pushed from the other side.

Which probably also means I can simply drop the max() call in this patch 10
here as well.

For the 'R' case: I haven't found the spot where this is actually used.

> > > > +	case P9_TCREATE:
> > > > +		BUG_ON(strcmp("dsdb?s", fmt));
> > > > +		va_arg(ap, int32_t);
> > > > +		{
> > > > +			const char *name = va_arg(ap, const char *);
> > > > +			if ((c->proto_version != p9_proto_2000u) &&
> > > > +			    (c->proto_version != p9_proto_2000L))
> > > 
> > > (I don't think 9p2000.L can call TCREATE, but it doesn't really hurt
> > > either)
> > 
> > Yes, Tcreate is only 9p2000 and 9p2000.u. Semantically this particular
> > check here means "if proto == 9p.2000". I can't remember anymore why I
> > came up with this inverted form here. I'll change it to "if
> > (c->proto_version == p9_proto_legacy)".
> 
> Sounds good.
> 
> > > > +	case P9_TRENAMEAT:
> > > if we have trenameat we probably want trename, tunlinkat as well?
> > > What's your criteria for counting individually vs slapping 8k at it?
> > > 
> > > In this particular case, oldname/newname are single component names
> > > within a directory so this is capped at 2*(4+256), that could easily fit
> > > in 4k without bothering.
> > 
> > I have not taken the Linux kernel's current filename limit NAME_MAX
> > (255) as basis, in that case you would be right. Instead I looked up
> > what the maximum filename length among file systems in general was,
> > and saw that ReiserFS supports up to slightly below 4k? So I took 4k
> > as basis for the calculation used here, and the intention was to make
> > this code more future proof. Because revisiting this code later on
> > always takes quite some time and always has this certain potential to
> > miss out details.
> 
> hmm, that's pretty deeply engrained into the VFS but I guess it might
> change eventually, yes.
> 
> I don't mind as long as we're consistent (cf. unlink/mkdir below), in
> practice measuring doesn't cost much.

OK, I also make that more clear from the commit log then that 4k was taken as
basis and why.

> > Independent of the decision; additionally it might make sense to add
> > something like:
> > 
> > #if NAME_MAX > 255
> > # error p9_msg_buf_size() needs adjustments
> > #endif
> 
> That's probably an understatement but I don't mind either way, it
> doesn't hurt.
> 
> > > > +		BUG_ON(strcmp("dsds", fmt));
> > > > +		va_arg(ap, int32_t);
> > > > +		{
> > > > +			const char *oldname = va_arg(ap, const char *);
> > > > +			va_arg(ap, int32_t);
> > > > +			{
> > > > +				const char *newname = va_arg(ap, const char *);
> > > 
> > > (style nitpick) I don't see the point of nesting another level of
> > > indentation here, it feels cleaner to declare oldname/newname at the
> > > start of the block and be done with it.
> > 
> > Because  va_arg(ap, int32_t);  must remain between those two
> > declarations, and I think either the compiler or style check script
> > was barking at me. But I will recheck, if possible I will remove the
> > additional block scope here.
> 
> Yes, I think it'd need to look like this:
> 
> 	case foo:
> 		BUG_ON(...)
> 		va_arg(ap, int32_t);
> 		{
> 			const char *oldname = va_arg(ap, const char *);
> 			const char *newname;
> 			va_arg(ap, int32_t);
> 			newname = va_arg(ap, const_char *);
> 			...
> 		}
> or
> 		{
> 			const char *oldname, *newname;
> 			oldname = va_arg(ap, const char *);
> 			va_arg(ap, int32_t)
> 			newname = va_arg(ap, const char *);
> 			...
> 		}
> 
> I guess the later is slightly easier on the eyes

Ah yes, that's your win there.

> > > > +	/* small message types */
> > > 
> > > ditto: what's your criteria for 4k vs 8k?
> > 
> > As above, 4k being the basis for directory entry names, plus PATH_MAX
> > (4k) as basis for maximum path length.
> > 
> > However looking at it again, if NAME_MAX == 4k was assumed exactly,
> > then Tsymlink would have the potential to exceed 8k, as it has name[s]
> > and symtgt[s] plus the other fields.
> 
> yes.
> 
> > > > +	case P9_TSTAT:
> > > this is just fid[4], so 4k is more than enough
> > 
> > I guess that was a typo and should have been Twstat instead?
> 
> Ah, had missed this because 9p2000.L's version of stat[n] is fixed size.
> Sounds good.
> 
> > > > +	case P9_RSTAT:
> > > also fixed size 4+4+8+8+8+8+8+8+4 -- fits in 4k.
> > 
> > Rstat contains stat[n] which in turn contains variable-length string
> > fields (filename, owner name, group name)
> 
> Right, same mistake.
> 
> > > > +	case P9_TSYMLINK:
> > > that one has symlink target which can be arbitrarily long (filesystem
> > > specific, 4k is the usual limit for linux but some filesystem I don't
> > > know might handle more -- it might be worth going through the trouble of
> > > going through it.
> > 
> > Like mentioned above, if exactly NAME_MAX == 4k was assumed, then
> > Tsymlink may even be >8k.
> 
> And all the other remarks are 'yes if we assume bigger NAME_MAX' -- I'm
> happy either way.
> 
> > > rest all looks ok to me.
> > 
> > Thanks for the review! I know, that's really a dry patch to look
> > at. :)
> 
> Thanks for writing it in the first place ;)
> 
> --
> Dominique
diff mbox series

Patch

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3754c33e2974..49939e8cde2a 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -23,6 +23,160 @@ 
 
 #include <trace/events/9p.h>
 
+/* len[2] text[len] */
+#define P9_STRLEN(s) \
+	(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))
+
+/**
+ * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
+ * intended 9p message.
+ * @c: client
+ * @type: message type
+ * @fmt: format template for assembling request message
+ * (see p9pdu_vwritef)
+ * @ap: variable arguments to be fed to passed format template
+ * (see p9pdu_vwritef)
+ *
+ * Note: Even for response types (P9_R*) the format template and variable
+ * arguments must always be for the originating request type (P9_T*).
+ */
+size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
+			const char *fmt, va_list ap)
+{
+	/* size[4] type[1] tag[2] */
+	const int hdr = 4 + 1 + 2;
+	/* ename[s] errno[4] */
+	const int rerror_size = hdr + P9_ERRMAX + 4;
+	/* ecode[4] */
+	const int rlerror_size = hdr + 4;
+	const int err_size =
+		c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;
+
+	switch (type) {
+
+	/* message types not used at all */
+	case P9_TERROR:
+	case P9_TLERROR:
+	case P9_TAUTH:
+	case P9_RAUTH:
+		BUG();
+
+	/* variable length & potentially large message types */
+	case P9_TATTACH:
+		BUG_ON(strcmp("ddss?u", fmt));
+		va_arg(ap, int32_t);
+		va_arg(ap, int32_t);
+		{
+			const char *uname = va_arg(ap, const char *);
+			const char *aname = va_arg(ap, const char *);
+			/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
+			return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
+		}
+	case P9_TWALK:
+		BUG_ON(strcmp("ddT", fmt));
+		va_arg(ap, int32_t);
+		va_arg(ap, int32_t);
+		{
+			uint i, nwname = max(va_arg(ap, int), 0);
+			size_t wname_all;
+			const char **wnames = va_arg(ap, const char **);
+			for (i = 0, wname_all = 0; i < nwname; ++i) {
+				wname_all += P9_STRLEN(wnames[i]);
+			}
+			/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+			return hdr + 4 + 4 + 2 + wname_all;
+		}
+	case P9_RWALK:
+		BUG_ON(strcmp("ddT", fmt));
+		va_arg(ap, int32_t);
+		va_arg(ap, int32_t);
+		{
+			uint nwname = va_arg(ap, int);
+			/* nwqid[2] nwqid*(wqid[13]) */
+			return max_t(size_t, hdr + 2 + nwname * 13, err_size);
+		}
+	case P9_TCREATE:
+		BUG_ON(strcmp("dsdb?s", fmt));
+		va_arg(ap, int32_t);
+		{
+			const char *name = va_arg(ap, const char *);
+			if ((c->proto_version != p9_proto_2000u) &&
+			    (c->proto_version != p9_proto_2000L))
+				/* fid[4] name[s] perm[4] mode[1] */
+				return hdr + 4 + P9_STRLEN(name) + 4 + 1;
+			{
+				va_arg(ap, int32_t);
+				va_arg(ap, int);
+				{
+					const char *ext = va_arg(ap, const char *);
+					/* fid[4] name[s] perm[4] mode[1] extension[s] */
+					return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
+				}
+			}
+		}
+	case P9_TLCREATE:
+		BUG_ON(strcmp("dsddg", fmt));
+		va_arg(ap, int32_t);
+		{
+			const char *name = va_arg(ap, const char *);
+			/* fid[4] name[s] flags[4] mode[4] gid[4] */
+			return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
+		}
+	case P9_RREAD:
+	case P9_RREADDIR:
+		BUG_ON(strcmp("dqd", fmt));
+		va_arg(ap, int32_t);
+		va_arg(ap, int64_t);
+		{
+			const int32_t count = va_arg(ap, int32_t);
+			/* count[4] data[count] */
+			return max_t(size_t, hdr + 4 + count, err_size);
+		}
+	case P9_TWRITE:
+		BUG_ON(strcmp("dqV", fmt));
+		va_arg(ap, int32_t);
+		va_arg(ap, int64_t);
+		{
+			const int32_t count = va_arg(ap, int32_t);
+			/* fid[4] offset[8] count[4] data[count] */
+			return hdr + 4 + 8 + 4 + count;
+		}
+	case P9_TRENAMEAT:
+		BUG_ON(strcmp("dsds", fmt));
+		va_arg(ap, int32_t);
+		{
+			const char *oldname = va_arg(ap, const char *);
+			va_arg(ap, int32_t);
+			{
+				const char *newname = va_arg(ap, const char *);
+				/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
+				return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
+			}
+		}
+	case P9_RERROR:
+		return rerror_size;
+	case P9_RLERROR:
+		return rlerror_size;
+
+	/* small message types */
+	case P9_TSTAT:
+	case P9_RSTAT:
+	case P9_TSYMLINK:
+	case P9_RREADLINK:
+	case P9_TXATTRWALK:
+	case P9_TXATTRCREATE:
+	case P9_TLINK:
+	case P9_TMKDIR:
+	case P9_TUNLINKAT:
+		return 8 * 1024;
+
+	/* tiny message types */
+	default:
+		return 4 * 1024;
+
+	}
+}
+
 static int
 p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 6d719c30331a..ad2283d1f96b 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -8,6 +8,8 @@ 
  *  Copyright (C) 2008 by IBM, Corp.
  */
 
+size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
+			const char *fmt, va_list ap);
 int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 		  va_list ap);
 int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);