[linux-cifs-client] cifs: add helper to simplify unicode to NLS conversion and use it (try #2)
diff mbox

Message ID 49E30934.9000709@suse.de
State New, archived
Headers show

Commit Message

Suresh Jayaraman April 13, 2009, 9:43 a.m. UTC
Thanks for the excellent review comments. Have incorporated all of them
except the concern that whether "src" is null terminated or not. I think
it is not required as cifs_strfromUCS_le() takes care of the boundary
check. Please review the updated patch below.

Add helper to simplify unicode(UCS/UTF-16) to NLS conversion. The helper
function calculates the memory needed exactly instead of using size
assumptions and consolidates some common code in there.

Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
---

 fs/cifs/cifs_unicode.h |   28 ++++++++++++++++++++
 fs/cifs/cifsproto.h    |    2 +
 fs/cifs/cifssmb.c      |   31 ++++++++++++++++++++++
 fs/cifs/connect.c      |   67 +++++++++++++++++++----------------------------
 fs/cifs/sess.c         |   46 +++++++++++++++-----------------
 5 files changed, 110 insertions(+), 64 deletions(-)

Comments

Peter Hudec April 13, 2009, 11:06 a.m. UTC | #1
Here are my suggestions. Please note that it is not tested, not even 
compiled!

When assuming that const wchar_t *str is a fixed width UCS-2 string, we 
can just divide the number of input bytes with 2 and

get the number of chars.
Additionally, I replaced maxlen with maxbytes, as I think we want to 
limit the number of bytes and not the number of chars.
I cleaned up not needed stack variables, such as uni.

This function calculates a native 0-termination, which should be fail-safe.

Example:
AAA\0
In UCS-2-LE:
41 00 41 00 00 00
            *****
Converted to UTF-8, ASCII or ISO 8859-1:
41 41 41 00
         **
'Converted' to UTF-16:
41 00 41 00 00 00
            *****

/*
 * UniStrnlenBytes: Return the length of a NLS string in bytes including 
0-termination.
 * Also, populates 'nchars' with the number of chars.
 */
static inline size_t
UniStrnlenBytes(const wchar_t *str, const unsigned int maxbytes, 
unsigned int *nchars,
        const struct nls_table *codepage)
{
    int nc;
    size_t nbytes = 0;
    char buf[NLS_MAX_CHARSET_SIZE]; /* enough for one char at a time */

    if (maxbytes == 0)
        return -EINVAL;

    while (1) {
        nc = codepage->uni2char(*str, buf, NLS_MAX_CHARSET_SIZE);
        if (nc > 0)
            nbytes += nc;
        else
            nbytes += 1; /* for '?' */
        if (nbytes >= maxbytes)
            break; /* byte limit reached */
        if (!*str)
            break; /* 0-termination processed */
        str++; /* next char */
    }
    *nchars = nbytes / 2; /* fixed width UCS-2: 16 bit per character */

    return nbytes;
}


/*
 * Calculates, allocates memory and converts src (UCS-2) to a NLS string.
 * Note: caller is responsible for freeing dst if function returned 0.
 * returns:
 *     on success - 0
 *     on failure - errno
 */
int
cifs_ucs_to_nls(char **dst, const char *src, const unsigned int 
maxbytes, int *plen,
               const struct nls_table *nls_codepage)
{
    size_t nbytes;
    int outlen;

    nbytes = UniStrnlenBytes((wchar_t *)src, maxbytes, plen, nls_codepage);

    if (nbytes <= 0)
        goto err_exit;

    *dst = kzalloc(nbytes, GFP_KERNEL); /* nbytes includes 0-termination */
    if (!*dst)
        goto err_exit;

    outlen = cifs_strfromUCS_le(*dst, (__le16 *)src, *plen, nls_codepage);

    /* Add 0-termination */
    /* nbytes - outlen should be sufficient, but you never know */
    nls_codepage->uni2char(0, *dst[outlen], nbytes - outlen);

    return 0;

err_exit:
    cERROR(1, ("Failed to allocate buffer for string\n"));
    return -ENOMEM;
}


I found an alternative wctomb function in sh-utils.
For me it looks more efficient. The question is if it should be cut to 
handle only 4 byte sequences.

Original from sh-utils-2.0.14/lib/unicodeio.c:
/* Stores the UTF-8 representation of the Unicode character wc in r[0..5].
   Returns the number of bytes stored, or -1 if wc is out of range.  */
static int
utf8_wctomb (unsigned char *r, unsigned int wc)
{
  int count;

  if (wc < 0x80)
    count = 1;
  else if (wc < 0x800)
    count = 2;
  else if (wc < 0x10000)
    count = 3;
  else if (wc < 0x200000)
    count = 4;
  else if (wc < 0x4000000)
    count = 5;
  else if (wc <= 0x7fffffff)
    count = 6;
  else
    return -1;

  switch (count)
    {
      /* Note: code falls through cases! */
      case 6: r[5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
      case 5: r[4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
      case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
      case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
      case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
      case 1: r[0] = wc;
    }

  return count;
}


Modified:
/* Stores the UTF-8 representation of the Unicode character wc in r[0..3].
   Returns the number of bytes stored, or -1 if wc is out of range.  */
static int
utf8_wctomb (unsigned char *r, unsigned int wc)
{
  int count;

  if (wc < 0x80)
    count = 1;
  else if (wc < 0x800)
    count = 2;
  else if (wc < 0x10000)
    count = 3;
  else if (wc <= 0x10ffff) /* maximum codepoint U+10FFFF */
    count = 4;
  else
    return -1;

  switch (count)
    {
      /* Note: code falls through cases! */
      case 4: r[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
      case 3: r[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
      case 2: r[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
      case 1: r[0] = wc;
    }

  return count;
}


As Microsoft seems to use surrogates, known as UTF16, in newer versions, 
these routings would produce so called CESU-8 instead of valid UTF-8. 
Perhaps we have to fix this.
Jeff Layton April 13, 2009, 11:39 a.m. UTC | #2
On Mon, 13 Apr 2009 15:13:16 +0530
Suresh Jayaraman <sjayaraman@suse.de> wrote:

> Thanks for the excellent review comments. Have incorporated all of them
> except the concern that whether "src" is null terminated or not. I think
> it is not required as cifs_strfromUCS_le() takes care of the boundary
> check. Please review the updated patch below.
> 
> Add helper to simplify unicode(UCS/UTF-16) to NLS conversion. The helper
> function calculates the memory needed exactly instead of using size
> assumptions and consolidates some common code in there.
> 
> Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
> ---
> 
>  fs/cifs/cifs_unicode.h |   28 ++++++++++++++++++++
>  fs/cifs/cifsproto.h    |    2 +
>  fs/cifs/cifssmb.c      |   31 ++++++++++++++++++++++
>  fs/cifs/connect.c      |   67 +++++++++++++++++++----------------------------
>  fs/cifs/sess.c         |   46 +++++++++++++++-----------------
>  5 files changed, 110 insertions(+), 64 deletions(-)
> 
> diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
> index 14eb9a2..06a267b 100644
> --- a/fs/cifs/cifs_unicode.h
> +++ b/fs/cifs/cifs_unicode.h
> @@ -159,6 +159,34 @@ UniStrnlen(const wchar_t *ucs1, int maxlen)
>  }
>  
>  /*
> + * UniStrnlenBytes: Return the length of a NLS string in bytes. Also, populates
> + * 'nchars' with the length of string in 16 bit Unicode chars.
> + */
> +static inline size_t
> +UniStrnlenBytes(const wchar_t *str, int maxlen, int *nchars,
> +		const struct nls_table *codepage)
> +{
> +	int nc;
> +	size_t i = 0, nbytes = 0;
> +	wchar_t uni = *str;
> +	char buf[NLS_MAX_CHARSET_SIZE]; /* enough for one char at a time */
> +
> +	while (*str++ && maxlen) {
> +		nc = codepage->uni2char(uni, buf, NLS_MAX_CHARSET_SIZE);
> +		if (nc > 0)
> +			nbytes += nc;
> +		else
> +			nbytes += 1; /* for '?' */
> +		i++;
> +		if (i >= maxlen)
> +			break;
> +	}
> +	*nchars = i;

Peter has a good point that we can get nchars by dividing the input
string size by 2. It might mean a few less operations but I doubt it
will be measurable.

> +
> +	return nbytes;
> +}
> +
> +/*
>   * UniStrncat:  Concatenate length limited string
>   */
>  static inline wchar_t *
> diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
> index 4167716..d0861d7 100644
> --- a/fs/cifs/cifsproto.h
> +++ b/fs/cifs/cifsproto.h
> @@ -383,4 +383,6 @@ extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon,
>  		const struct nls_table *nls_codepage, int remap_special_chars);
>  extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
>  			const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
> +extern int cifs_ucs_to_nls(char **dst, const char *src, const int maxlen,
> +			   int *plen, const struct nls_table *nls_codepage);
>  #endif			/* _CIFSPROTO_H */
> diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
> index bc09c99..ca99d35 100644
> --- a/fs/cifs/cifssmb.c
> +++ b/fs/cifs/cifssmb.c
> @@ -81,6 +81,37 @@ static struct {
>  #endif /* CONFIG_CIFS_WEAK_PW_HASH */
>  #endif /* CIFS_POSIX */
>  
> +
> +/*
> + * Calculates, allocates memory and converts to a NLS string.
> + * Note: caller is responsible for freeing dst if function returned 0.
> + * returns:
> + * 	on success - 0
> + * 	on failure - errno
> + */
> +int
> +cifs_ucs_to_nls(char **dst, const char *src, const int maxlen, int *plen,
> +		       const struct nls_table *nls_codepage)
> +{
> +	size_t nbytes;
> +
> +	nbytes = UniStrnlenBytes((wchar_t *)src, maxlen, plen, nls_codepage);
> +	*dst = kzalloc(nbytes + 2, GFP_KERNEL);
> +	if (!*dst)
> +		goto err_exit;
> +
> +	cifs_strfromUCS_le(*dst, (__le16 *)src, *plen, nls_codepage);
> +	/* Assumes NLS string has always 1 byte NULL terminator
> +	 * BB Need to be fixed otherwise */
> +	(*dst)[*plen] = 0;
> +
> +	return 0;
> +
> +err_exit:
> +	cERROR(1, ("Failed to allocate buffer for string\n"));
> +	return -ENOMEM;
> +}
> +
>  /* Allocates buffer into dst and copies smb string from src to it.
>   * caller is responsible for freeing dst if function returned 0.
>   * returns:
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index 0de3b56..17a6ef4 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -2667,39 +2667,29 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
>  					remaining_words =
>  						BCC(smb_buffer_response) / 2;
>  				}
> -				len =
> -				    UniStrnlen((wchar_t *) bcc_ptr,
> -					       remaining_words - 1);
> -/* We look for obvious messed up bcc or strings in response so we do not go off
> -   the end since (at least) WIN2K and Windows XP have a major bug in not null
> -   terminating last Unicode string in response  */
> -				if (ses->serverOS)
> -					kfree(ses->serverOS);
> -				ses->serverOS = kzalloc(2 * (len + 1),
> -							GFP_KERNEL);
> -				if (ses->serverOS == NULL)
> +				/* Win2K and Windows XP seem to have a major bug
> +				 * in not null terminating last Unicode string
> +				 * in response */
> +				kfree(ses->serverOS);
> +				rc = cifs_ucs_to_nls(&(ses->serverOS), bcc_ptr,
> +						remaining_words - 1, &len,
> +						nls_codepage);
> +				if (rc)
>  					goto sesssetup_nomem;
> -				cifs_strfromUCS_le(ses->serverOS,
> -						   (__le16 *)bcc_ptr,
> -						   len, nls_codepage);
>  				bcc_ptr += 2 * (len + 1);
>  				remaining_words -= len + 1;
> -				ses->serverOS[2 * len] = 0;
> -				ses->serverOS[1 + (2 * len)] = 0;
>  				if (remaining_words > 0) {
> -					len = UniStrnlen((wchar_t *)bcc_ptr,
> -							 remaining_words-1);
> +					/* Win2K and Windows XP seem to have a
> +					 * major bug in not null terminating
> +					 * last Unicode string in response */
>  					kfree(ses->serverNOS);
> -					ses->serverNOS = kzalloc(2 * (len + 1),
> -								 GFP_KERNEL);
> -					if (ses->serverNOS == NULL)
> +					rc = cifs_ucs_to_nls(&(ses->serverNOS),
> +							bcc_ptr,
> +							remaining_words - 1,
> +							&len, nls_codepage);
> +					if (rc)
>  						goto sesssetup_nomem;
> -					cifs_strfromUCS_le(ses->serverNOS,
> -							   (__le16 *)bcc_ptr,
> -							   len, nls_codepage);
>  					bcc_ptr += 2 * (len + 1);
> -					ses->serverNOS[2 * len] = 0;
> -					ses->serverNOS[1 + (2 * len)] = 0;
>  					if (strncmp(ses->serverNOS,
>  						"NT LAN Manager 4", 16) == 0) {
>  						cFYI(1, ("NT4 server"));
> @@ -2707,22 +2697,19 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
>  					}
>  					remaining_words -= len + 1;
>  					if (remaining_words > 0) {
> -						len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
> -				/* last string is not always null terminated
> -				   (for e.g. for Windows XP & 2000) */
> -						if (ses->serverDomain)
> -							kfree(ses->serverDomain);
> -						ses->serverDomain =
> -						    kzalloc(2*(len+1),
> -							    GFP_KERNEL);
> -						if (ses->serverDomain == NULL)
> +						/* Win2K and Windows XP seem to
> +						 * have a major bug in not null
> +						 * terminating last Unicode
> +						 * string in response */
> +						kfree(ses->serverDomain);
> +						rc = cifs_ucs_to_nls(
> +							&(ses->serverDomain),
> +							bcc_ptr,
> +							remaining_words, &len,
> +							nls_codepage);
> +						if (rc)
>  							goto sesssetup_nomem;
> -						cifs_strfromUCS_le(ses->serverDomain,
> -							(__le16 *)bcc_ptr,
> -							len, nls_codepage);
>  						bcc_ptr += 2 * (len + 1);
> -						ses->serverDomain[2*len] = 0;
> -						ses->serverDomain[1+(2*len)] = 0;
>  					} else { /* else no more room so create
>  						  dummy domain string */
>  						if (ses->serverDomain)
> diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
> index 5c68b42..1c50063 100644
> --- a/fs/cifs/sess.c
> +++ b/fs/cifs/sess.c
> @@ -301,33 +301,32 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
>  	words_left = bleft / 2;
>  
>  	/* save off server operating system */
> -	len = UniStrnlen((wchar_t *) data, words_left);
>  
> -/* We look for obvious messed up bcc or strings in response so we do not go off
> -   the end since (at least) WIN2K and Windows XP have a major bug in not null
> -   terminating last Unicode string in response  */
> +	/* Win2K and Windows XP seem to have a major bug in not null terminating
> +	 * last unicode string in response */
> +	kfree(ses->serverOS);
> +	rc = cifs_ucs_to_nls(&(ses->serverOS), data, words_left, &len, nls_cp);
> +	if (rc)
> +		return rc;
> +
>  	if (len >= words_left)
>  		return rc;
>  

So will this code make it so that we work around this bug in win2k and
XP? Or will it return an error when it gets one of these improperly
terminated packets?

It seems like if that's a danger then before doing the string
coversion, maybe we should detect when the response isn't properly NULL
terminated and fix up the packet so that it is. Then we could just run
it through the helper routines above without having to these extra
checks. Or maybe I'm misunderstanding the problem?


> -	kfree(ses->serverOS);
> -	/* UTF-8 string will not grow more than four times as big as UCS-16 */
> -	ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
> -	if (ses->serverOS != NULL)
> -		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
>  	data += 2 * (len + 1);
>  	words_left -= len + 1;
>  
>  	/* save off server network operating system */
> -	len = UniStrnlen((wchar_t *) data, words_left);
>  
> -	if (len >= words_left)
> +	/* Win2K and Windows XP seem to have a major bug in not null terminating
> +	 * last unicode string in response */
> +	kfree(ses->serverNOS);
> +	rc = cifs_ucs_to_nls(&(ses->serverNOS), data, words_left, &len, nls_cp);
> +	if (rc)
>  		return rc;
>  
> -	kfree(ses->serverNOS);
> -	ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
> +	if (len >= words_left)
> +		return rc;
>  	if (ses->serverNOS != NULL) {
> -		cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
> -				   nls_cp);
>  		if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) {
>  			cFYI(1, ("NT4 server"));
>  			ses->flags |= CIFS_SES_NT4;
> @@ -337,19 +336,18 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
>  	words_left -= len + 1;
>  
>  	/* save off server domain */
> -	len = UniStrnlen((wchar_t *) data, words_left);
> +
> +	/* Win2K and Windows XP seem to have a major bug in not null terminating
> +	 * last unicode string in response */
> +	kfree(ses->serverDomain);
> +	rc = cifs_ucs_to_nls(&(ses->serverDomain), data, words_left, &len,
> +			     nls_cp);
> +	if (rc)
> +		return rc;
>  
>  	if (len > words_left)
>  		return rc;
>  
> -	kfree(ses->serverDomain);
> -	ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
> -	if (ses->serverDomain != NULL) {
> -		cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
> -				   nls_cp);
> -		ses->serverDomain[2*len] = 0;
> -		ses->serverDomain[(2*len) + 1] = 0;
> -	}
>  	data += 2 * (len + 1);
>  	words_left -= len + 1;
>
Peter Hudec April 13, 2009, 12:15 p.m. UTC | #3
Jeff Layton schrieb:
>>  /*
>> + * UniStrnlenBytes: Return the length of a NLS string in bytes. Also, populates
>> + * 'nchars' with the length of string in 16 bit Unicode chars.
>> + */
>> +static inline size_t
>> +UniStrnlenBytes(const wchar_t *str, int maxlen, int *nchars,
>> +		const struct nls_table *codepage)
>> +{
>> +	int nc;
>> +	size_t i = 0, nbytes = 0;
>> +	wchar_t uni = *str;
>> +	char buf[NLS_MAX_CHARSET_SIZE]; /* enough for one char at a time */
>> +
>> +	while (*str++ && maxlen) {
>> +		nc = codepage->uni2char(uni, buf, NLS_MAX_CHARSET_SIZE);
>> +		if (nc > 0)
>> +			nbytes += nc;
>> +		else
>> +			nbytes += 1; /* for '?' */
>> +		i++;
>> +		if (i >= maxlen)
>> +			break;
>> +	}
>> +	*nchars = i;
>>     
>
> Peter has a good point that we can get nchars by dividing the input
> string size by 2. It might mean a few less operations but I doubt it
> will be measurable.
Well, I think I got confused myself, because nbytes is not the input 
size, but the output size.
We actually do need the number of iterations of the while statement, but 
in my eyes we can do it directly.
(*nchars)++ in the loop should do it.


Peter
Suresh Jayaraman April 13, 2009, 12:59 p.m. UTC | #4
Jeff Layton wrote:
> On Mon, 13 Apr 2009 15:13:16 +0530
> Suresh Jayaraman <sjayaraman@suse.de> wrote:

>> Add helper to simplify unicode(UCS/UTF-16) to NLS conversion. The helper
>> function calculates the memory needed exactly instead of using size
>> assumptions and consolidates some common code in there.
>>
>> Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
>> ---
>>
>>  fs/cifs/cifs_unicode.h |   28 ++++++++++++++++++++
>>  fs/cifs/cifsproto.h    |    2 +
>>  fs/cifs/cifssmb.c      |   31 ++++++++++++++++++++++
>>  fs/cifs/connect.c      |   67 +++++++++++++++++++----------------------------
>>  fs/cifs/sess.c         |   46 +++++++++++++++-----------------
>>  5 files changed, 110 insertions(+), 64 deletions(-)
>>
>> diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
>> index 14eb9a2..06a267b 100644
>> --- a/fs/cifs/cifs_unicode.h
>> +++ b/fs/cifs/cifs_unicode.h
>> @@ -159,6 +159,34 @@ UniStrnlen(const wchar_t *ucs1, int maxlen)
>>  }
>>  
>>  /*
>> + * UniStrnlenBytes: Return the length of a NLS string in bytes. Also, populates
>> + * 'nchars' with the length of string in 16 bit Unicode chars.
>> + */
>> +static inline size_t
>> +UniStrnlenBytes(const wchar_t *str, int maxlen, int *nchars,
>> +		const struct nls_table *codepage)
>> +{
>> +	int nc;
>> +	size_t i = 0, nbytes = 0;
>> +	wchar_t uni = *str;
>> +	char buf[NLS_MAX_CHARSET_SIZE]; /* enough for one char at a time */
>> +
>> +	while (*str++ && maxlen) {
>> +		nc = codepage->uni2char(uni, buf, NLS_MAX_CHARSET_SIZE);
>> +		if (nc > 0)
>> +			nbytes += nc;
>> +		else
>> +			nbytes += 1; /* for '?' */
>> +		i++;
>> +		if (i >= maxlen)
>> +			break;
>> +	}
>> +	*nchars = i;
> 
> Peter has a good point that we can get nchars by dividing the input
> string size by 2. It might mean a few less operations but I doubt it
> will be measurable.

Did you mean output nbytes by 2?

As Peter pointed out, unneeded stack var uni can be removed. Other than
that, do you see any other changes/improvements?

>> diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
>> index 5c68b42..1c50063 100644
>> --- a/fs/cifs/sess.c
>> +++ b/fs/cifs/sess.c
>> @@ -301,33 +301,32 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
>>  	words_left = bleft / 2;
>>  
>>  	/* save off server operating system */
>> -	len = UniStrnlen((wchar_t *) data, words_left);
>>  
>> -/* We look for obvious messed up bcc or strings in response so we do not go off
>> -   the end since (at least) WIN2K and Windows XP have a major bug in not null
>> -   terminating last Unicode string in response  */
>> +	/* Win2K and Windows XP seem to have a major bug in not null terminating
>> +	 * last unicode string in response */
>> +	kfree(ses->serverOS);
>> +	rc = cifs_ucs_to_nls(&(ses->serverOS), data, words_left, &len, nls_cp);
>> +	if (rc)
>> +		return rc;
>> +
>>  	if (len >= words_left)
>>  		return rc;
>>  
> 
> So will this code make it so that we work around this bug in win2k and
> XP? Or will it return an error when it gets one of these improperly
> terminated packets?

I think so, perhaps Steve is the right person to ask, Steve?

> It seems like if that's a danger then before doing the string
> coversion, maybe we should detect when the response isn't properly NULL
> terminated and fix up the packet so that it is. Then we could just run
> it through the helper routines above without having to these extra
> checks. Or maybe I'm misunderstanding the problem?

My comment was based on the earlier comment and I have not seen
experienced this issue. I also need to understand this better.

Thanks,

Patch
diff mbox

diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 14eb9a2..06a267b 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -159,6 +159,34 @@  UniStrnlen(const wchar_t *ucs1, int maxlen)
 }
 
 /*
+ * UniStrnlenBytes: Return the length of a NLS string in bytes. Also, populates
+ * 'nchars' with the length of string in 16 bit Unicode chars.
+ */
+static inline size_t
+UniStrnlenBytes(const wchar_t *str, int maxlen, int *nchars,
+		const struct nls_table *codepage)
+{
+	int nc;
+	size_t i = 0, nbytes = 0;
+	wchar_t uni = *str;
+	char buf[NLS_MAX_CHARSET_SIZE]; /* enough for one char at a time */
+
+	while (*str++ && maxlen) {
+		nc = codepage->uni2char(uni, buf, NLS_MAX_CHARSET_SIZE);
+		if (nc > 0)
+			nbytes += nc;
+		else
+			nbytes += 1; /* for '?' */
+		i++;
+		if (i >= maxlen)
+			break;
+	}
+	*nchars = i;
+
+	return nbytes;
+}
+
+/*
  * UniStrncat:  Concatenate length limited string
  */
 static inline wchar_t *
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4167716..d0861d7 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -383,4 +383,6 @@  extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon,
 		const struct nls_table *nls_codepage, int remap_special_chars);
 extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
 			const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
+extern int cifs_ucs_to_nls(char **dst, const char *src, const int maxlen,
+			   int *plen, const struct nls_table *nls_codepage);
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index bc09c99..ca99d35 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -81,6 +81,37 @@  static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
 
+
+/*
+ * Calculates, allocates memory and converts to a NLS string.
+ * Note: caller is responsible for freeing dst if function returned 0.
+ * returns:
+ * 	on success - 0
+ * 	on failure - errno
+ */
+int
+cifs_ucs_to_nls(char **dst, const char *src, const int maxlen, int *plen,
+		       const struct nls_table *nls_codepage)
+{
+	size_t nbytes;
+
+	nbytes = UniStrnlenBytes((wchar_t *)src, maxlen, plen, nls_codepage);
+	*dst = kzalloc(nbytes + 2, GFP_KERNEL);
+	if (!*dst)
+		goto err_exit;
+
+	cifs_strfromUCS_le(*dst, (__le16 *)src, *plen, nls_codepage);
+	/* Assumes NLS string has always 1 byte NULL terminator
+	 * BB Need to be fixed otherwise */
+	(*dst)[*plen] = 0;
+
+	return 0;
+
+err_exit:
+	cERROR(1, ("Failed to allocate buffer for string\n"));
+	return -ENOMEM;
+}
+
 /* Allocates buffer into dst and copies smb string from src to it.
  * caller is responsible for freeing dst if function returned 0.
  * returns:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0de3b56..17a6ef4 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2667,39 +2667,29 @@  CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					remaining_words =
 						BCC(smb_buffer_response) / 2;
 				}
-				len =
-				    UniStrnlen((wchar_t *) bcc_ptr,
-					       remaining_words - 1);
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
-				if (ses->serverOS)
-					kfree(ses->serverOS);
-				ses->serverOS = kzalloc(2 * (len + 1),
-							GFP_KERNEL);
-				if (ses->serverOS == NULL)
+				/* Win2K and Windows XP seem to have a major bug
+				 * in not null terminating last Unicode string
+				 * in response */
+				kfree(ses->serverOS);
+				rc = cifs_ucs_to_nls(&(ses->serverOS), bcc_ptr,
+						remaining_words - 1, &len,
+						nls_codepage);
+				if (rc)
 					goto sesssetup_nomem;
-				cifs_strfromUCS_le(ses->serverOS,
-						   (__le16 *)bcc_ptr,
-						   len, nls_codepage);
 				bcc_ptr += 2 * (len + 1);
 				remaining_words -= len + 1;
-				ses->serverOS[2 * len] = 0;
-				ses->serverOS[1 + (2 * len)] = 0;
 				if (remaining_words > 0) {
-					len = UniStrnlen((wchar_t *)bcc_ptr,
-							 remaining_words-1);
+					/* Win2K and Windows XP seem to have a
+					 * major bug in not null terminating
+					 * last Unicode string in response */
 					kfree(ses->serverNOS);
-					ses->serverNOS = kzalloc(2 * (len + 1),
-								 GFP_KERNEL);
-					if (ses->serverNOS == NULL)
+					rc = cifs_ucs_to_nls(&(ses->serverNOS),
+							bcc_ptr,
+							remaining_words - 1,
+							&len, nls_codepage);
+					if (rc)
 						goto sesssetup_nomem;
-					cifs_strfromUCS_le(ses->serverNOS,
-							   (__le16 *)bcc_ptr,
-							   len, nls_codepage);
 					bcc_ptr += 2 * (len + 1);
-					ses->serverNOS[2 * len] = 0;
-					ses->serverNOS[1 + (2 * len)] = 0;
 					if (strncmp(ses->serverNOS,
 						"NT LAN Manager 4", 16) == 0) {
 						cFYI(1, ("NT4 server"));
@@ -2707,22 +2697,19 @@  CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					}
 					remaining_words -= len + 1;
 					if (remaining_words > 0) {
-						len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
-				/* last string is not always null terminated
-				   (for e.g. for Windows XP & 2000) */
-						if (ses->serverDomain)
-							kfree(ses->serverDomain);
-						ses->serverDomain =
-						    kzalloc(2*(len+1),
-							    GFP_KERNEL);
-						if (ses->serverDomain == NULL)
+						/* Win2K and Windows XP seem to
+						 * have a major bug in not null
+						 * terminating last Unicode
+						 * string in response */
+						kfree(ses->serverDomain);
+						rc = cifs_ucs_to_nls(
+							&(ses->serverDomain),
+							bcc_ptr,
+							remaining_words, &len,
+							nls_codepage);
+						if (rc)
 							goto sesssetup_nomem;
-						cifs_strfromUCS_le(ses->serverDomain,
-							(__le16 *)bcc_ptr,
-							len, nls_codepage);
 						bcc_ptr += 2 * (len + 1);
-						ses->serverDomain[2*len] = 0;
-						ses->serverDomain[1+(2*len)] = 0;
 					} else { /* else no more room so create
 						  dummy domain string */
 						if (ses->serverDomain)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5c68b42..1c50063 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -301,33 +301,32 @@  static int decode_unicode_ssetup(char **pbcc_area, int bleft,
 	words_left = bleft / 2;
 
 	/* save off server operating system */
-	len = UniStrnlen((wchar_t *) data, words_left);
 
-/* We look for obvious messed up bcc or strings in response so we do not go off
-   the end since (at least) WIN2K and Windows XP have a major bug in not null
-   terminating last Unicode string in response  */
+	/* Win2K and Windows XP seem to have a major bug in not null terminating
+	 * last unicode string in response */
+	kfree(ses->serverOS);
+	rc = cifs_ucs_to_nls(&(ses->serverOS), data, words_left, &len, nls_cp);
+	if (rc)
+		return rc;
+
 	if (len >= words_left)
 		return rc;
 
-	kfree(ses->serverOS);
-	/* UTF-8 string will not grow more than four times as big as UCS-16 */
-	ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
-	if (ses->serverOS != NULL)
-		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
 	data += 2 * (len + 1);
 	words_left -= len + 1;
 
 	/* save off server network operating system */
-	len = UniStrnlen((wchar_t *) data, words_left);
 
-	if (len >= words_left)
+	/* Win2K and Windows XP seem to have a major bug in not null terminating
+	 * last unicode string in response */
+	kfree(ses->serverNOS);
+	rc = cifs_ucs_to_nls(&(ses->serverNOS), data, words_left, &len, nls_cp);
+	if (rc)
 		return rc;
 
-	kfree(ses->serverNOS);
-	ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL);
+	if (len >= words_left)
+		return rc;
 	if (ses->serverNOS != NULL) {
-		cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
-				   nls_cp);
 		if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) {
 			cFYI(1, ("NT4 server"));
 			ses->flags |= CIFS_SES_NT4;
@@ -337,19 +336,18 @@  static int decode_unicode_ssetup(char **pbcc_area, int bleft,
 	words_left -= len + 1;
 
 	/* save off server domain */
-	len = UniStrnlen((wchar_t *) data, words_left);
+
+	/* Win2K and Windows XP seem to have a major bug in not null terminating
+	 * last unicode string in response */
+	kfree(ses->serverDomain);
+	rc = cifs_ucs_to_nls(&(ses->serverDomain), data, words_left, &len,
+			     nls_cp);
+	if (rc)
+		return rc;
 
 	if (len > words_left)
 		return rc;
 
-	kfree(ses->serverDomain);
-	ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
-	if (ses->serverDomain != NULL) {
-		cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
-				   nls_cp);
-		ses->serverDomain[2*len] = 0;
-		ses->serverDomain[(2*len) + 1] = 0;
-	}
 	data += 2 * (len + 1);
 	words_left -= len + 1;