[08/16] nfsd: escape high characters in binary data
diff mbox series

Message ID 1561042275-12723-9-git-send-email-bfields@redhat.com
State New
Headers show
Series
  • exposing knfsd client state to userspace
Related show

Commit Message

J. Bruce Fields June 20, 2019, 2:51 p.m. UTC
From: "J. Bruce Fields" <bfields@redhat.com>

I'm exposing some information about NFS clients in pseudofiles.  I
expect to eventually have simple tools to help read those pseudofiles.

But it's also helpful if the raw files are human-readable to the extent
possible.  It aids debugging and makes them usable on systems that don't
have the latest nfs-utils.

A minor challenge there is opaque client-generated protocol objects like
state owners and client identifiers.  Some clients generate those to
include handy information in plain ascii.  But they may also include
arbitrary byte sequences.

I think the simplest approach is to limit to isprint(c) && isascii(c)
and escape everything else.

That means you can just cat the file and get something that looks OK.
Also, I'm trying to keep these files legal YAML, which requires them to
UTF-8, and this is a simple way to guarantee that.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/seq_file.c                  | 11 +++++++++++
 include/linux/seq_file.h       |  1 +
 include/linux/string_helpers.h |  3 +++
 lib/string_helpers.c           | 19 +++++++++++++++++++
 4 files changed, 34 insertions(+)

Comments

J. Bruce Fields June 21, 2019, 5:45 p.m. UTC | #1
I'm not sure who to get review from for this kind of thing.

Kees, you seem to be one of the only people to touch string_helpers.c
at all recently, any ideas?

--b.

On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <bfields@redhat.com>
> 
> I'm exposing some information about NFS clients in pseudofiles.  I
> expect to eventually have simple tools to help read those pseudofiles.
> 
> But it's also helpful if the raw files are human-readable to the extent
> possible.  It aids debugging and makes them usable on systems that don't
> have the latest nfs-utils.
> 
> A minor challenge there is opaque client-generated protocol objects like
> state owners and client identifiers.  Some clients generate those to
> include handy information in plain ascii.  But they may also include
> arbitrary byte sequences.
> 
> I think the simplest approach is to limit to isprint(c) && isascii(c)
> and escape everything else.
> 
> That means you can just cat the file and get something that looks OK.
> Also, I'm trying to keep these files legal YAML, which requires them to
> UTF-8, and this is a simple way to guarantee that.
> 
> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> ---
>  fs/seq_file.c                  | 11 +++++++++++
>  include/linux/seq_file.h       |  1 +
>  include/linux/string_helpers.h |  3 +++
>  lib/string_helpers.c           | 19 +++++++++++++++++++
>  4 files changed, 34 insertions(+)
> 
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index abe27ec43176..04f09689cd6d 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc)
>  }
>  EXPORT_SYMBOL(seq_escape);
>  
> +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
> +{
> +	char *buf;
> +	size_t size = seq_get_buf(m, &buf);
> +	int ret;
> +
> +	ret = string_escape_mem_ascii(src, isz, buf, size);
> +	seq_commit(m, ret < size ? ret : -1);
> +}
> +EXPORT_SYMBOL(seq_escape_mem_ascii);
> +
>  void seq_vprintf(struct seq_file *m, const char *f, va_list args)
>  {
>  	int len;
> diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
> index a121982af0f5..5998e1f4ff06 100644
> --- a/include/linux/seq_file.h
> +++ b/include/linux/seq_file.h
> @@ -127,6 +127,7 @@ void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
>  		    unsigned long long v, unsigned int width);
>  
>  void seq_escape(struct seq_file *m, const char *s, const char *esc);
> +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz);
>  
>  void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
>  		  int rowsize, int groupsize, const void *buf, size_t len,
> diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
> index d23c5030901a..c28955132234 100644
> --- a/include/linux/string_helpers.h
> +++ b/include/linux/string_helpers.h
> @@ -54,6 +54,9 @@ static inline int string_unescape_any_inplace(char *buf)
>  int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
>  		unsigned int flags, const char *only);
>  
> +int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
> +					size_t osz);
> +
>  static inline int string_escape_mem_any_np(const char *src, size_t isz,
>  		char *dst, size_t osz, const char *only)
>  {
> diff --git a/lib/string_helpers.c b/lib/string_helpers.c
> index 29c490e5d478..9ca19918ca26 100644
> --- a/lib/string_helpers.c
> +++ b/lib/string_helpers.c
> @@ -539,6 +539,25 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
>  }
>  EXPORT_SYMBOL(string_escape_mem);
>  
> +int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
> +					size_t osz)
> +{
> +	char *p = dst;
> +	char *end = p + osz;
> +
> +	while (isz--) {
> +		unsigned char c = *src++;
> +
> +		if (!isprint(c) || !isascii(c) || c == '"' || c == '\\')
> +			escape_hex(c, &p, end);
> +		else
> +			escape_passthrough(c, &p, end);
> +	}
> +
> +	return p - dst;
> +}
> +EXPORT_SYMBOL(string_escape_mem_ascii);
> +
>  /*
>   * Return an allocated string that has been escaped of special characters
>   * and double quotes, making it safe to log in quotes.
> -- 
> 2.21.0
Kees Cook June 21, 2019, 10:26 p.m. UTC | #2
On Fri, Jun 21, 2019 at 01:45:44PM -0400, J. Bruce Fields wrote:
> I'm not sure who to get review from for this kind of thing.
> 
> Kees, you seem to be one of the only people to touch string_helpers.c
> at all recently, any ideas?

Hi! Yeah, I'm happy to take a look. Notes below...

> 
> --b.
> 
> On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> > From: "J. Bruce Fields" <bfields@redhat.com>
> > 
> > I'm exposing some information about NFS clients in pseudofiles.  I
> > expect to eventually have simple tools to help read those pseudofiles.
> > 
> > But it's also helpful if the raw files are human-readable to the extent
> > possible.  It aids debugging and makes them usable on systems that don't
> > have the latest nfs-utils.
> > 
> > A minor challenge there is opaque client-generated protocol objects like
> > state owners and client identifiers.  Some clients generate those to
> > include handy information in plain ascii.  But they may also include
> > arbitrary byte sequences.
> > 
> > I think the simplest approach is to limit to isprint(c) && isascii(c)
> > and escape everything else.

Can you get the same functionality out of sprintf's %pE (escaped
string)? If not, maybe we should expand the flags available?

 * - 'E[achnops]' For an escaped buffer, where rules are defined by
 * combination
 *                of the following flags (see string_escape_mem() for
 *                the
 *                details):
 *                  a - ESCAPE_ANY
 *                  c - ESCAPE_SPECIAL
 *                  h - ESCAPE_HEX
 *                  n - ESCAPE_NULL
 *                  o - ESCAPE_OCTAL
 *                  p - ESCAPE_NP
 *                  s - ESCAPE_SPACE
 *                By default ESCAPE_ANY_NP is used.

This doesn't cover escaping >0x7f and " and \

And perhaps I should rework kstrdup_quotable() to have that flag? It's
not currently escaping non-ascii and it probably should. Maybe
"ESCAPE_QUOTABLE" as "q"?
J. Bruce Fields June 22, 2019, 7 p.m. UTC | #3
On Fri, Jun 21, 2019 at 03:26:00PM -0700, Kees Cook wrote:
> On Fri, Jun 21, 2019 at 01:45:44PM -0400, J. Bruce Fields wrote:
> > I'm not sure who to get review from for this kind of thing.
> > 
> > Kees, you seem to be one of the only people to touch string_helpers.c
> > at all recently, any ideas?
> 
> Hi! Yeah, I'm happy to take a look. Notes below...

Thanks!

> > On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> > > From: "J. Bruce Fields" <bfields@redhat.com>
> > > 
> > > I'm exposing some information about NFS clients in pseudofiles.  I
> > > expect to eventually have simple tools to help read those pseudofiles.
> > > 
> > > But it's also helpful if the raw files are human-readable to the extent
> > > possible.  It aids debugging and makes them usable on systems that don't
> > > have the latest nfs-utils.
> > > 
> > > A minor challenge there is opaque client-generated protocol objects like
> > > state owners and client identifiers.  Some clients generate those to
> > > include handy information in plain ascii.  But they may also include
> > > arbitrary byte sequences.
> > > 
> > > I think the simplest approach is to limit to isprint(c) && isascii(c)
> > > and escape everything else.
> 
> Can you get the same functionality out of sprintf's %pE (escaped
> string)? If not, maybe we should expand the flags available?

Nothing against it, I just didn't want it to do that for one user,
but...

> 
>  * - 'E[achnops]' For an escaped buffer, where rules are defined by
>  * combination
>  *                of the following flags (see string_escape_mem() for
>  *                the
>  *                details):
>  *                  a - ESCAPE_ANY
>  *                  c - ESCAPE_SPECIAL
>  *                  h - ESCAPE_HEX
>  *                  n - ESCAPE_NULL
>  *                  o - ESCAPE_OCTAL
>  *                  p - ESCAPE_NP
>  *                  s - ESCAPE_SPACE
>  *                By default ESCAPE_ANY_NP is used.
> 
> This doesn't cover escaping >0x7f and " and \
> 
> And perhaps I should rework kstrdup_quotable() to have that flag? It's
> not currently escaping non-ascii and it probably should. Maybe
> "ESCAPE_QUOTABLE" as "q"?

... but if you think there's a lot of existing users that really want
this behavior, then great.

I'll look into that.

The logic around ESCAPE_NP and the "only" string is really confusing.  I
started assuming I could just add an ESCAPE_NONASCII flag and stick "
and \ into the "only" string, but it doesn't work that way.

---b.
Kees Cook June 22, 2019, 8:22 p.m. UTC | #4
On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> The logic around ESCAPE_NP and the "only" string is really confusing.  I
> started assuming I could just add an ESCAPE_NONASCII flag and stick "
> and \ into the "only" string, but it doesn't work that way.

Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
through. It'd be nice to have an "add" or a clearer way to do actual
ctype subsets, etc. If there isn't an obviously clear way to refactor
it, just skip it for now and I'm happy to ack your original patch. :)
J. Bruce Fields June 24, 2019, 9:05 p.m. UTC | #5
On Sat, Jun 22, 2019 at 01:22:56PM -0700, Kees Cook wrote:
> On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> > The logic around ESCAPE_NP and the "only" string is really confusing.  I
> > started assuming I could just add an ESCAPE_NONASCII flag and stick "
> > and \ into the "only" string, but it doesn't work that way.
> 
> Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
> through. It'd be nice to have an "add" or a clearer way to do actual
> ctype subsets, etc. If there isn't an obviously clear way to refactor
> it, just skip it for now and I'm happy to ack your original patch. :)

There may well be some simplification possible here....  There aren't
really many users of "only", for example.  I'll look into it some more.

--b.
J. Bruce Fields June 26, 2019, 4:21 p.m. UTC | #6
On Mon, Jun 24, 2019 at 05:05:12PM -0400, J. Bruce Fields wrote:
> On Sat, Jun 22, 2019 at 01:22:56PM -0700, Kees Cook wrote:
> > On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> > > The logic around ESCAPE_NP and the "only" string is really confusing.  I
> > > started assuming I could just add an ESCAPE_NONASCII flag and stick "
> > > and \ into the "only" string, but it doesn't work that way.
> > 
> > Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
> > through. It'd be nice to have an "add" or a clearer way to do actual
> > ctype subsets, etc. If there isn't an obviously clear way to refactor
> > it, just skip it for now and I'm happy to ack your original patch. :)
> 
> There may well be some simplification possible here....  There aren't
> really many users of "only", for example.  I'll look into it some more.

The printk users are kind of mysterious to me.  I did a grep for

	git grep '%[0-9.*]pE'

which got 75 hits.  All of them for pE.  I couldn't find any of the
other pE[achnops] variants.  pE is equivalent to ESCAPE_ANY|ESCAPE_NP.
Confusingly, ESCAPE_NP doesn't mean "escape non-printable", it means
"don't escape printable".  So things like carriage returns aren't
escaped.

Of those 57 were in drivers/net/wireless, and from a quick check seemed
mostly to be for SSIDs in debug messages.  I *think* SSIDs can be
arbitrary bytes?  If they really want them escaped then I suspect they
want more than just nonprintable characters escaped.

One of the hits outside wireless code was in drm_dp_cec_adap_status,
which was printing some device ID into a debugfs file with "ID: %*pE\n".
If the ID actually needs escaping, then I suspect the meant to escape \n
too to prevent misparsing that output.

--b.
Kees Cook June 27, 2019, 4:16 a.m. UTC | #7
On Wed, Jun 26, 2019 at 12:21:49PM -0400, J. Bruce Fields wrote:
> On Mon, Jun 24, 2019 at 05:05:12PM -0400, J. Bruce Fields wrote:
> > On Sat, Jun 22, 2019 at 01:22:56PM -0700, Kees Cook wrote:
> > > On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> > > > The logic around ESCAPE_NP and the "only" string is really confusing.  I
> > > > started assuming I could just add an ESCAPE_NONASCII flag and stick "
> > > > and \ into the "only" string, but it doesn't work that way.
> > > 
> > > Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
> > > through. It'd be nice to have an "add" or a clearer way to do actual
> > > ctype subsets, etc. If there isn't an obviously clear way to refactor
> > > it, just skip it for now and I'm happy to ack your original patch. :)
> > 
> > There may well be some simplification possible here....  There aren't
> > really many users of "only", for example.  I'll look into it some more.
> 
> The printk users are kind of mysterious to me.  I did a grep for
> 
> 	git grep '%[0-9.*]pE'
> 
> which got 75 hits.  All of them for pE.  I couldn't find any of the
> other pE[achnops] variants.  pE is equivalent to ESCAPE_ANY|ESCAPE_NP.

I saw pEn and pEhp and pEp:

drivers/staging/rtl8192e/rtllib.h:      snprintf(escaped, sizeof(escaped), "%*pEn", essid_len, essid);
drivers/staging/rtl8192u/ieee80211/ieee80211.h: snprintf(escaped, sizeof(escaped), "%*pEn", essid_len, essid);
drivers/staging/wlan-ng/prism2sta.c: netdev_info(wlandev->netdev, "Prism2 card SN: %*pEhp\n",
drivers/thunderbolt/xdomain.c:  return sprintf(buf, "%*pEp\n", (int)strlen(svc->key), svc->key);

However, every use was insufficient, AFAICT.

This:
	git grep -2 '\bescape_essid\b'
Shows that all the staging uses end up getting logged as: '%s' so their
escaping is insufficient.

> Confusingly, ESCAPE_NP doesn't mean "escape non-printable", it means
> "don't escape printable".  So things like carriage returns aren't
> escaped.

Right -- any they're almost all logged surrounded by ' or " which means
those would need to be escaped as well. The prism2 is leaking newlines
too, as well as the thunderbolt sysfs printing.

So... seems like we should fix this. :P

> Of those 57 were in drivers/net/wireless, and from a quick check seemed
> mostly to be for SSIDs in debug messages.  I *think* SSIDs can be
> arbitrary bytes?  If they really want them escaped then I suspect they
> want more than just nonprintable characters escaped.
> 
> One of the hits outside wireless code was in drm_dp_cec_adap_status,
> which was printing some device ID into a debugfs file with "ID: %*pE\n".
> If the ID actually needs escaping, then I suspect the meant to escape \n
> too to prevent misparsing that output.

I think we need to make the default produce "loggable" output.
non-ascii, non-printables, \, ', and " need to be escaped. Maybe " "
too?
J. Bruce Fields June 27, 2019, 3:23 p.m. UTC | #8
On Wed, Jun 26, 2019 at 09:16:44PM -0700, Kees Cook wrote:
> Right -- any they're almost all logged surrounded by ' or " which means
> those would need to be escaped as well. The prism2 is leaking newlines
> too, as well as the thunderbolt sysfs printing.
> 
> So... seems like we should fix this. :P
...
> I think we need to make the default produce "loggable" output.
> non-ascii, non-printables, \, ', and " need to be escaped. Maybe " "
> too?

OK, so I think the first step is to take a closer look at the users of
the default %*pE.  If there are any that look like they'd be broken by a
change, we should make patches moving to something else, then we can
change the default.

Then we can also replace ESCAPE_ANY and ESCAPE_NP--that "don't escape
printable" logic is confusing and makes it hard to add more types of
escaping.  And it appears to only be used by %*pE.

--b
J. Bruce Fields June 27, 2019, 8:21 p.m. UTC | #9
On Wed, Jun 26, 2019 at 12:21:49PM -0400, J. Bruce Fields wrote:
> On Mon, Jun 24, 2019 at 05:05:12PM -0400, J. Bruce Fields wrote:
> > On Sat, Jun 22, 2019 at 01:22:56PM -0700, Kees Cook wrote:
> > > On Sat, Jun 22, 2019 at 03:00:58PM -0400, J. Bruce Fields wrote:
> > > > The logic around ESCAPE_NP and the "only" string is really confusing.  I
> > > > started assuming I could just add an ESCAPE_NONASCII flag and stick "
> > > > and \ into the "only" string, but it doesn't work that way.
> > > 
> > > Yeah, if ESCAPE_NP isn't specified, the "only" characters are passed
> > > through. It'd be nice to have an "add" or a clearer way to do actual
> > > ctype subsets, etc. If there isn't an obviously clear way to refactor
> > > it, just skip it for now and I'm happy to ack your original patch. :)
> > 
> > There may well be some simplification possible here....  There aren't
> > really many users of "only", for example.  I'll look into it some more.
> 
> The printk users are kind of mysterious to me.  I did a grep for
> 
> 	git grep '%[0-9.*]pE'
> 
> which got 75 hits.  All of them for pE.  I couldn't find any of the
> other pE[achnops] variants.  pE is equivalent to ESCAPE_ANY|ESCAPE_NP.
> Confusingly, ESCAPE_NP doesn't mean "escape non-printable", it means
> "don't escape printable".  So things like carriage returns aren't
> escaped.

No, I was confused: "\n" is non-printable according to isprint(), so
ESCAPE_ANY_NP *will* escape it.  So this isn't quite so bad.  SSIDs are
usually printed as '%*pE', so arguably we should be escaping the single
quote character too, but at least we're not allowing line breaks
through.  I don't know about non-ascii.

> One of the hits outside wireless code was in drm_dp_cec_adap_status,
> which was printing some device ID into a debugfs file with "ID: %*pE\n".
> If the ID actually needs escaping, then I suspect the meant to escape \n
> too to prevent misparsing that output.

And same here, this is OK.

--b.
Kees Cook June 28, 2019, 3:58 a.m. UTC | #10
On Thu, Jun 27, 2019 at 04:21:24PM -0400, J. Bruce Fields wrote:
> No, I was confused: "\n" is non-printable according to isprint(), so
> ESCAPE_ANY_NP *will* escape it.  So this isn't quite so bad.  SSIDs are
> usually printed as '%*pE', so arguably we should be escaping the single
> quote character too, but at least we're not allowing line breaks
> through.  I don't know about non-ascii.

Okay, cool. Given that most things are just trying to log, it seems like
it should be safe to have %pE escape non-ascii, non-printable, \, ', and "?

And if we changing that, we're likely changing
string_escape_mem(). Looking at callers of string_escape_mem() makes my
head spin...

Anyway, I don't want to block you needlessly. What would like to have
be next steps here?
J. Bruce Fields June 28, 2019, 4:33 p.m. UTC | #11
On Thu, Jun 27, 2019 at 08:58:22PM -0700, Kees Cook wrote:
> On Thu, Jun 27, 2019 at 04:21:24PM -0400, J. Bruce Fields wrote:
> > No, I was confused: "\n" is non-printable according to isprint(), so
> > ESCAPE_ANY_NP *will* escape it.  So this isn't quite so bad.  SSIDs are
> > usually printed as '%*pE', so arguably we should be escaping the single
> > quote character too, but at least we're not allowing line breaks
> > through.  I don't know about non-ascii.
> 
> Okay, cool. Given that most things are just trying to log, it seems like
> it should be safe to have %pE escape non-ascii, non-printable, \, ', and "?
> 
> And if we changing that, we're likely changing
> string_escape_mem(). Looking at callers of string_escape_mem() makes my
> head spin...

kstrdup_quotable:
	- only a few callers, mostly just logging, but
	  msm_gpu_crashstate_capture uses it to generate some data that
	  looks like it goes in a crashdump.  Dunno if there might be
	  some utility depending on the current escaping. On the other
	  hand, kstrdup_quotable uses ESCAPE_HEX, "\f\n\r\t\v\a\e\\\""
	  so those characters are all escaped as \xNN, so I'd hope
	  any parser would be prepared to unescape any hex character,
	  they'd have to go out of their way to do anything else.
string_escape_str:
	- proc_task_name: ESCAPE_SPACE|ESCAPE_SPECIAL, "\n\\", used for
	  command name in /proc/<pid>/stat{us}.  No way do I want to
	  change the format of those files at all.
	- seq_escape: ESCAPE_OCTAL, esc: haven't surveyed callers
	  carefully, but this probably shouldn't be changed.
	- qword_add: ESCAPE_OCTAL, "\\ \n\t", some nfsd upcalls.  Fine
	  as they are, but the other side will happily accept any octal
	  or hex escaping.
string_escape_mem_any_np, string_escape_str_any_np:
	- totally unused.
escaped_string: this is the vsnprintf logic.  Tons of users, haven't had
	a chance to look at them all.  Almost all %*pE, the exceptions
	don't look important.

So the only flag values we care about are ESCAPE_HEX, ESCAPE_OCTAL,
ESCAPE_SPACE|ESCAPE_SPECIAL, and ESCAPE_ANY_NP.

So this could be cleaned up some if we wanted.

> Anyway, I don't want to block you needlessly. What would like to have
> be next steps here?

I might still be interested in some cleanup, I find the current logic
unnecessarily confusing.

But I may just give up and go with my existing patch and put
off that project indefinitely, especially if there's no real need to fix
the existing callers.

I don't know....

--b.
J. Bruce Fields July 10, 2019, 10:09 p.m. UTC | #12
On Fri, Jun 28, 2019 at 12:33:58PM -0400, J. Bruce Fields wrote:
> But I may just give up and go with my existing patch and put
> off that project indefinitely, especially if there's no real need to fix
> the existing callers.

I went with the existing patch, but gave a little more thought to
string_escape_mem.  Stuff that bugs me:

	- ESCAPE_NP sounds like it means "escape nonprinting
	  characters", but actually means "do not escape printing
	  characters"
	- the use of the "only" string to limit the list of escaped
	  characters rather than supplement them is confusing and kind
	  of unhelpful.
	- most of the flags are actually totally unused
    
So what I'd like to do is:
    
	- eliminate unused flags
	- use the "only" string to add to, rather than replace, the list
	  of characters to escape
	- separate flags into those that select which characters to
	  escape, and those that choose the format of the escaping ("\ "
	  vs "\x20" vs "\040".)
    
I've got some patches that do all that and I think it works.  I need to
clean them up a bit and fix up the tests.

--b.
Kees Cook July 11, 2019, 1:54 a.m. UTC | #13
On Wed, Jul 10, 2019 at 06:09:31PM -0400, J. Bruce Fields wrote:
> On Fri, Jun 28, 2019 at 12:33:58PM -0400, J. Bruce Fields wrote:
> > But I may just give up and go with my existing patch and put
> > off that project indefinitely, especially if there's no real need to fix
> > the existing callers.
> 
> I went with the existing patch, but gave a little more thought to
> string_escape_mem.  Stuff that bugs me:
> 
> 	- ESCAPE_NP sounds like it means "escape nonprinting
> 	  characters", but actually means "do not escape printing
> 	  characters"
> 	- the use of the "only" string to limit the list of escaped
> 	  characters rather than supplement them is confusing and kind
> 	  of unhelpful.
> 	- most of the flags are actually totally unused
>     
> So what I'd like to do is:
>     
> 	- eliminate unused flags
> 	- use the "only" string to add to, rather than replace, the list
> 	  of characters to escape
> 	- separate flags into those that select which characters to
> 	  escape, and those that choose the format of the escaping ("\ "
> 	  vs "\x20" vs "\040".)
>     
> I've got some patches that do all that and I think it works.  I need to
> clean them up a bit and fix up the tests.

This sounds amazing; thanks! Luckily there are self-tests for this code,
so anything really surprising should stand out. I'm looking forward to
it -- I want to see if I can refactor a few of the callers (if you
haven't already do so) too.

Yay!
Shevchenko, Andriy Aug. 6, 2019, 12:19 p.m. UTC | #14
On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <bfields@redhat.com>
> 
> I'm exposing some information about NFS clients in pseudofiles.  I
> expect to eventually have simple tools to help read those pseudofiles.
> 
> But it's also helpful if the raw files are human-readable to the extent
> possible.  It aids debugging and makes them usable on systems that don't
> have the latest nfs-utils.
> 
> A minor challenge there is opaque client-generated protocol objects like
> state owners and client identifiers.  Some clients generate those to
> include handy information in plain ascii.  But they may also include
> arbitrary byte sequences.
> 
> I think the simplest approach is to limit to isprint(c) && isascii(c)
> and escape everything else.
> 
> That means you can just cat the file and get something that looks OK.
> Also, I'm trying to keep these files legal YAML, which requires them to
> UTF-8, and this is a simple way to guarantee that.

Two questions:
- why can't be original function extended to cover this case
  (using additional flags, maybe)?
- where are the test cases?
J. Bruce Fields Aug. 6, 2019, 6:50 p.m. UTC | #15
On Tue, Aug 06, 2019 at 03:19:31PM +0300, Andy Shevchenko wrote:
> On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> > From: "J. Bruce Fields" <bfields@redhat.com>
> > 
> > I'm exposing some information about NFS clients in pseudofiles.  I
> > expect to eventually have simple tools to help read those pseudofiles.
> > 
> > But it's also helpful if the raw files are human-readable to the extent
> > possible.  It aids debugging and makes them usable on systems that don't
> > have the latest nfs-utils.
> > 
> > A minor challenge there is opaque client-generated protocol objects like
> > state owners and client identifiers.  Some clients generate those to
> > include handy information in plain ascii.  But they may also include
> > arbitrary byte sequences.
> > 
> > I think the simplest approach is to limit to isprint(c) && isascii(c)
> > and escape everything else.
> > 
> > That means you can just cat the file and get something that looks OK.
> > Also, I'm trying to keep these files legal YAML, which requires them to
> > UTF-8, and this is a simple way to guarantee that.
> 
> Two questions:
> - why can't be original function extended to cover this case
>   (using additional flags, maybe)?

I found the ESCAPE_NP/"only" logic made it a little difficult to extend
string_escape_mem().

So, I wrote a patch series that removes the string_escape_mem flags that
aren't used, simplifies it a bit, then separates the flags into two
different types: those that select which characters to escape
(non-printable, non-ascii, whitespace, etc.) and those that choose a
style of escaping to use (octal, hex, or \\).  That seems to make the
code a little easier to extend while still covering the cases people
actually use.  I'll try to get those out this week and you can tell me
what you think.

> - where are the test cases?

I didn't write a test case.  I agree that it would be a good idea--I'll
work on it.

--b.
Shevchenko, Andriy Aug. 7, 2019, 9 a.m. UTC | #16
On Tue, Aug 06, 2019 at 02:50:08PM -0400, J. Bruce Fields wrote:
> On Tue, Aug 06, 2019 at 03:19:31PM +0300, Andy Shevchenko wrote:
> > On Thu, Jun 20, 2019 at 10:51:07AM -0400, J. Bruce Fields wrote:
> > > From: "J. Bruce Fields" <bfields@redhat.com>
> > > 
> > > I'm exposing some information about NFS clients in pseudofiles.  I
> > > expect to eventually have simple tools to help read those pseudofiles.
> > > 
> > > But it's also helpful if the raw files are human-readable to the extent
> > > possible.  It aids debugging and makes them usable on systems that don't
> > > have the latest nfs-utils.
> > > 
> > > A minor challenge there is opaque client-generated protocol objects like
> > > state owners and client identifiers.  Some clients generate those to
> > > include handy information in plain ascii.  But they may also include
> > > arbitrary byte sequences.
> > > 
> > > I think the simplest approach is to limit to isprint(c) && isascii(c)
> > > and escape everything else.
> > > 
> > > That means you can just cat the file and get something that looks OK.
> > > Also, I'm trying to keep these files legal YAML, which requires them to
> > > UTF-8, and this is a simple way to guarantee that.
> > 
> > Two questions:
> > - why can't be original function extended to cover this case
> >   (using additional flags, maybe)?
> 
> I found the ESCAPE_NP/"only" logic made it a little difficult to extend
> string_escape_mem().

Maybe it requires more thinking about?
I think it is still possible to extend existing, rather to take workarounds
like this one.

> So, I wrote a patch series that removes the string_escape_mem flags that
> aren't used

Have you considered the potential users that can be converted to use
string_escape_mem()?

I know about at least one (needs to be reworked a bit, but it is in slow
progress).

There are potentially others that would be converted using "unused" flags.

>, simplifies it a bit, then separates the flags into two
> different types: those that select which characters to escape
> (non-printable, non-ascii, whitespace, etc.) and those that choose a
> style of escaping to use (octal, hex, or \\).  That seems to make the
> code a little easier to extend while still covering the cases people
> actually use.  I'll try to get those out this week and you can tell me
> what you think.

Will be glad to help!

In any case regarding to this one, I would like rather to see it's never
appeared, or now will be gone in favour of string_escape_mem() extension.
J. Bruce Fields Aug. 8, 2019, 11:28 a.m. UTC | #17
On Wed, Aug 07, 2019 at 12:00:07PM +0300, Andy Shevchenko wrote:
> Maybe it requires more thinking about?
> I think it is still possible to extend existing, rather to take workarounds
> like this one.

Yeah, agreed.

> > So, I wrote a patch series that removes the string_escape_mem flags that
> > aren't used
> 
> Have you considered the potential users that can be converted to use
> string_escape_mem()?
> 
> I know about at least one (needs to be reworked a bit, but it is in slow
> progress).
> 
> There are potentially others that would be converted using "unused" flags.

OK, that'd be interesting to know about.

> 
> >, simplifies it a bit, then separates the flags into two
> > different types: those that select which characters to escape
> > (non-printable, non-ascii, whitespace, etc.) and those that choose a
> > style of escaping to use (octal, hex, or \\).  That seems to make the
> > code a little easier to extend while still covering the cases people
> > actually use.  I'll try to get those out this week and you can tell me
> > what you think.
> 
> Will be glad to help!
> 
> In any case regarding to this one, I would like rather to see it's never
> appeared, or now will be gone in favour of string_escape_mem() extension.

To be clear, it's already merged.  Apologies, I actually saw your name
when looking for people to cc, but the last commit was 5 years ago and I
assumed you'd moved on.  The project to extend string_escape_mem()
looked more complicated than I first expected so I decided to merge this
first and then follow up with my attempt at that.

--b.
Shevchenko, Andriy Aug. 27, 2019, 1:36 p.m. UTC | #18
On Thu, Aug 08, 2019 at 07:28:44AM -0400, J. Bruce Fields wrote:
> On Wed, Aug 07, 2019 at 12:00:07PM +0300, Andy Shevchenko wrote:

> > In any case regarding to this one, I would like rather to see it's never
> > appeared, or now will be gone in favour of string_escape_mem() extension.
> 
> To be clear, it's already merged.  Apologies, I actually saw your name
> when looking for people to cc, but the last commit was 5 years ago and I
> assumed you'd moved on.  The project to extend string_escape_mem()
> looked more complicated than I first expected so I decided to merge this
> first and then follow up with my attempt at that.

The (main) problem with the new helper is that is missed from %pE point of
view. That's why I really prefer to see new flags rather than new helpers like
that.

Patch
diff mbox series

diff --git a/fs/seq_file.c b/fs/seq_file.c
index abe27ec43176..04f09689cd6d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -384,6 +384,17 @@  void seq_escape(struct seq_file *m, const char *s, const char *esc)
 }
 EXPORT_SYMBOL(seq_escape);
 
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz)
+{
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int ret;
+
+	ret = string_escape_mem_ascii(src, isz, buf, size);
+	seq_commit(m, ret < size ? ret : -1);
+}
+EXPORT_SYMBOL(seq_escape_mem_ascii);
+
 void seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
 	int len;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index a121982af0f5..5998e1f4ff06 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -127,6 +127,7 @@  void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
 		    unsigned long long v, unsigned int width);
 
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
+void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
 		  int rowsize, int groupsize, const void *buf, size_t len,
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index d23c5030901a..c28955132234 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -54,6 +54,9 @@  static inline int string_unescape_any_inplace(char *buf)
 int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
 		unsigned int flags, const char *only);
 
+int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
+					size_t osz);
+
 static inline int string_escape_mem_any_np(const char *src, size_t isz,
 		char *dst, size_t osz, const char *only)
 {
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 29c490e5d478..9ca19918ca26 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -539,6 +539,25 @@  int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
 }
 EXPORT_SYMBOL(string_escape_mem);
 
+int string_escape_mem_ascii(const char *src, size_t isz, char *dst,
+					size_t osz)
+{
+	char *p = dst;
+	char *end = p + osz;
+
+	while (isz--) {
+		unsigned char c = *src++;
+
+		if (!isprint(c) || !isascii(c) || c == '"' || c == '\\')
+			escape_hex(c, &p, end);
+		else
+			escape_passthrough(c, &p, end);
+	}
+
+	return p - dst;
+}
+EXPORT_SYMBOL(string_escape_mem_ascii);
+
 /*
  * Return an allocated string that has been escaped of special characters
  * and double quotes, making it safe to log in quotes.