diff mbox series

[PATCHv3] gcc: disable '-Wstrignop-overread' universally for gcc-13+ and FORTIFY_SOURCE

Message ID 20241208161315.730138-1-nilay@linux.ibm.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series [PATCHv3] gcc: disable '-Wstrignop-overread' universally for gcc-13+ and FORTIFY_SOURCE | expand

Commit Message

Nilay Shroff Dec. 8, 2024, 4:12 p.m. UTC
While building the powerpc code using gcc 13.x, I came across following
errors generated for kernel/padata.c file:

  CC      kernel/padata.o
In file included from ./include/linux/string.h:390,
                 from ./arch/powerpc/include/asm/paca.h:16,
                 from ./arch/powerpc/include/asm/current.h:13,
                 from ./include/linux/thread_info.h:23,
                 from ./include/asm-generic/preempt.h:5,
                 from ./arch/powerpc/include/generated/asm/preempt.h:1,
                 from ./include/linux/preempt.h:79,
                 from ./include/linux/spinlock.h:56,
                 from ./include/linux/swait.h:7,
                 from ./include/linux/completion.h:12,
                 from kernel/padata.c:14:
In function ‘bitmap_copy’,
    inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2,
    inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2:
./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread]
  114 | #define __underlying_memcpy     __builtin_memcpy
      |                                 ^
./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’
  633 |         __underlying_##op(p, q, __fortify_size);                        \
      |         ^~~~~~~~~~~~~
./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’
  678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
      |                          ^~~~~~~~~~~~~~~~~~~~
./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’
  259 |                 memcpy(dst, src, len);
      |                 ^~~~~~
kernel/padata.c: In function ‘__padata_set_cpumasks’:
kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256]
  713 |                                  cpumask_var_t pcpumask,
      |                                  ~~~~~~~~~~~~~~^~~~~~~~
In function ‘bitmap_copy’,
    inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2,
    inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2:
./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread]
  114 | #define __underlying_memcpy     __builtin_memcpy
      |                                 ^
./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’
  633 |         __underlying_##op(p, q, __fortify_size);                        \
      |         ^~~~~~~~~~~~~
./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’
  678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
      |                          ^~~~~~~~~~~~~~~~~~~~
./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’
  259 |                 memcpy(dst, src, len);
      |                 ^~~~~~
kernel/padata.c: In function ‘__padata_set_cpumasks’:
kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256]
  713 |                                  cpumask_var_t pcpumask,
      |                                  ~~~~~~~~~~~~~~^~~~~~~~

The above error appears to be false-positive:
From the distro relevant config,
    CONFIG_PPC64=y
    CONFIG_CC_IS_GCC=y
    CONFIG_GCC_VERSION=130301
    CONFIG_NR_CPUS=2048
    CONFIG_FORTIFY_SOURCE=y

From the source code,
unsigned long name[BITS_TO_LONGS(bits)]
...
typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
typedef struct cpumask cpumask_var_t[1];
...

extern unsigned int nr_cpu_ids;
...
static __always_inline
void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned
int nbits)
{
        unsigned int len = bitmap_size(nbits);

        if (small_const_nbits(nbits))
                *dst = *src;
        else
                memcpy(dst, src, len);
}

static __always_inline
void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
{
        bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
}
...
static int __padata_set_cpumasks(struct padata_instance *pinst,
                                 cpumask_var_t pcpumask,
                                 cpumask_var_t cbcpumask)
{
...
        cpumask_copy(pinst->cpumask.pcpu, pcpumask);
        cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
...
}

So the above statements expands to:
memcpy(pinst->cpumask.pcpu->bits, pcpumask->bits, nr_cpu_ids)
memcpy(pinst->cpumask.cbcpu->bits, cbcpumask->bits, nr_cpu_ids)

Now the compiler complains about "error: ‘__builtin_memcpy’ reading
between 257 and 536870904 bytes from a region of size 256". So the
value of nr_cpu_ids which gcc calculated is between 257 and 536870904.
This looks strange and incorrect.

Later similar errors[1] were also observed on x86-64 platform using 
gcc-14. Apparently, above errors menifests using gcc-13+ and config 
option CONFIG_FORTIFY_SOURCE=y. Moreover, I don't encounter above 
error when,
- using gcc 11.x or gcc 12.x or LLVM/Clang compiler
or
- disabling CONFIG_FORTIFY_SOURCE option

So for now, silence above errors globally while using gcc-13+ and
CONFIG_FORTIFY_SOURCE=y. We may later revert this change when we
find root cause of the error. Per this  discussion[2], GCC developers
are working to add extra diagnostics and context when this error
menifests and that might help to find the root cause.

[1] https://lore.kernel.org/all/2024120757-lustrous-equinox-77f0@gregkh/
[2] https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666870.html

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: briannorris@chromium.org
Cc: yury.norov@gmail.com
Cc: kees@kernel.org
Cc: gustavoars@kernel.org
Cc: nathan@kernel.org
Cc: steffen.klassert@secunet.com
Cc: daniel.m.jordan@oracle.com
Cc: gjoyce@ibm.com
Cc: linux-crypto@vger.kernel.org
Cc: linux@weissschuh.net
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
Changes from v2:
    - Globally disable -Werror-stringop-overread while using gcc-13+ and
      FORTIFY_SOURCE (Yury Norov)
    - Updated the patch subject line to make it clear that this is
	  possiblt gcc error and not related to cpumask.
Changes from v1:
    - Fix spell error in the commit message (Brian Norris)
    - Add commentary around change to note that changes are needed to
      avoid false positive on gcc 13+ (Brian Norris)
    - Add the kerenl/padata.c file maintainers (Brian Norris)
---
 init/Kconfig               | 8 ++++++++
 scripts/Makefile.extrawarn | 1 +
 2 files changed, 9 insertions(+)

Comments

Yury Norov Dec. 8, 2024, 6:25 p.m. UTC | #1
On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
> While building the powerpc code using gcc 13.x, I came across following
> errors generated for kernel/padata.c file:
> 
>   CC      kernel/padata.o
> In file included from ./include/linux/string.h:390,
>                  from ./arch/powerpc/include/asm/paca.h:16,
>                  from ./arch/powerpc/include/asm/current.h:13,
>                  from ./include/linux/thread_info.h:23,
>                  from ./include/asm-generic/preempt.h:5,
>                  from ./arch/powerpc/include/generated/asm/preempt.h:1,
>                  from ./include/linux/preempt.h:79,
>                  from ./include/linux/spinlock.h:56,
>                  from ./include/linux/swait.h:7,
>                  from ./include/linux/completion.h:12,
>                  from kernel/padata.c:14:
> In function ‘bitmap_copy’,
>     inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2,
>     inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2:
> ./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread]
>   114 | #define __underlying_memcpy     __builtin_memcpy
>       |                                 ^
> ./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’
>   633 |         __underlying_##op(p, q, __fortify_size);                        \
>       |         ^~~~~~~~~~~~~
> ./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’
>   678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
>       |                          ^~~~~~~~~~~~~~~~~~~~
> ./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’
>   259 |                 memcpy(dst, src, len);
>       |                 ^~~~~~
> kernel/padata.c: In function ‘__padata_set_cpumasks’:
> kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256]
>   713 |                                  cpumask_var_t pcpumask,
>       |                                  ~~~~~~~~~~~~~~^~~~~~~~
> In function ‘bitmap_copy’,
>     inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2,
>     inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2:
> ./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread]
>   114 | #define __underlying_memcpy     __builtin_memcpy
>       |                                 ^
> ./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’
>   633 |         __underlying_##op(p, q, __fortify_size);                        \
>       |         ^~~~~~~~~~~~~
> ./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’
>   678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
>       |                          ^~~~~~~~~~~~~~~~~~~~
> ./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’
>   259 |                 memcpy(dst, src, len);
>       |                 ^~~~~~
> kernel/padata.c: In function ‘__padata_set_cpumasks’:
> kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256]
>   713 |                                  cpumask_var_t pcpumask,
>       |                                  ~~~~~~~~~~~~~~^~~~~~~~
> 
> The above error appears to be false-positive:
> >From the distro relevant config,
>     CONFIG_PPC64=y
>     CONFIG_CC_IS_GCC=y
>     CONFIG_GCC_VERSION=130301
>     CONFIG_NR_CPUS=2048
>     CONFIG_FORTIFY_SOURCE=y
> 
> >From the source code,
> unsigned long name[BITS_TO_LONGS(bits)]
> ...
> typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
> typedef struct cpumask cpumask_var_t[1];
> ...
> 
> extern unsigned int nr_cpu_ids;
> ...
> static __always_inline
> void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned
> int nbits)
> {
>         unsigned int len = bitmap_size(nbits);
> 
>         if (small_const_nbits(nbits))
>                 *dst = *src;
>         else
>                 memcpy(dst, src, len);
> }
> 
> static __always_inline
> void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
> {
>         bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
> }
> ...
> static int __padata_set_cpumasks(struct padata_instance *pinst,
>                                  cpumask_var_t pcpumask,
>                                  cpumask_var_t cbcpumask)
> {
> ...
>         cpumask_copy(pinst->cpumask.pcpu, pcpumask);
>         cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
> ...
> }
> 
> So the above statements expands to:
> memcpy(pinst->cpumask.pcpu->bits, pcpumask->bits, nr_cpu_ids)
> memcpy(pinst->cpumask.cbcpu->bits, cbcpumask->bits, nr_cpu_ids)
> 
> Now the compiler complains about "error: ‘__builtin_memcpy’ reading
> between 257 and 536870904 bytes from a region of size 256". So the
> value of nr_cpu_ids which gcc calculated is between 257 and 536870904.
> This looks strange and incorrect.

Thanks for the detour into internals. I did the same by myself, and
spent quite a lot of my time trying to understand why GCC believes
that here we're trying to access memory beyond idx == 256 and up to
a pretty random 536870904.

256 is most likely NR_CPUS/8, and that makes sense. But I have no ideas
what does this 536870904 mean. OK, it's ((u32)-64)>>3, but to me it's a
random number. I'm quite sure cpumasks machinery can't be involved in
generating it.

Well, we maybe have to spend more time on this, at least try to
understand how exactly CONFIG_FORTIFY_SOURCE is involved. But to me
it's OK to silence the warning this way. Moreover, you're pointing to
undergoing discussions.

> Later similar errors[1] were also observed on x86-64 platform using 
> gcc-14. Apparently, above errors menifests using gcc-13+ and config 
> option CONFIG_FORTIFY_SOURCE=y. Moreover, I don't encounter above 
> error when,
> - using gcc 11.x or gcc 12.x or LLVM/Clang compiler
> or
> - disabling CONFIG_FORTIFY_SOURCE option
> 
> So for now, silence above errors globally while using gcc-13+ and
> CONFIG_FORTIFY_SOURCE=y. We may later revert this change when we
> find root cause of the error. Per this  discussion[2], GCC developers
> are working to add extra diagnostics and context when this error
> menifests and that might help to find the root cause.
> 
> [1] https://lore.kernel.org/all/2024120757-lustrous-equinox-77f0@gregkh/
> [2] https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666870.html
> 
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Greg acked v2, which differs from v3 quite a lot. I would check with
him if he's still OK.

> Cc: briannorris@chromium.org
> Cc: yury.norov@gmail.com
> Cc: kees@kernel.org
> Cc: gustavoars@kernel.org
> Cc: nathan@kernel.org
> Cc: steffen.klassert@secunet.com
> Cc: daniel.m.jordan@oracle.com
> Cc: gjoyce@ibm.com
> Cc: linux-crypto@vger.kernel.org
> Cc: linux@weissschuh.net
> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
> ---
> Changes from v2:
>     - Globally disable -Werror-stringop-overread while using gcc-13+ and
>       FORTIFY_SOURCE (Yury Norov)
>     - Updated the patch subject line to make it clear that this is
> 	  possiblt gcc error and not related to cpumask.
> Changes from v1:
>     - Fix spell error in the commit message (Brian Norris)
>     - Add commentary around change to note that changes are needed to
>       avoid false positive on gcc 13+ (Brian Norris)
>     - Add the kerenl/padata.c file maintainers (Brian Norris)
> ---
>  init/Kconfig               | 8 ++++++++
>  scripts/Makefile.extrawarn | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/init/Kconfig b/init/Kconfig
> index a20e6efd3f0f..ff2f54520551 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -920,6 +920,14 @@ config CC_STRINGOP_OVERFLOW
>  	bool
>  	default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW
>  
> +# Currently, disable -Wstringop-overread for gcc-13+ and FORTIFY_SOURCE globally.
> +config GCC13_NO_STRINGOP_OVERREAD
> +	def_bool y

This is an issue for GCC14 as well, as Greg mentioned, and you say the
same in the commit message. I'd name this config GCC_NO_STRINGOP_OVERREAD.

But I don't think we need this extra knob at all. Those who want to
disable CC_NO_STRINGOP_OVERREAD can disable it explicitly.

Anyways, it's minor. For cpumasks:

Acked-by: Yury Norov <yury.norov@gmail.com>

> +
> +config CC_NO_STRINGOP_OVERREAD
> +	bool
> +	default y if CC_IS_GCC && GCC_VERSION >= 130000 && GCC13_NO_STRINGOP_OVERREAD && FORTIFY_SOURCE
> +
>  #
>  # For architectures that know their GCC __int128 support is sound
>  #
> diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
> index 1d13cecc7cc7..1abd41269fd0 100644
> --- a/scripts/Makefile.extrawarn
> +++ b/scripts/Makefile.extrawarn
> @@ -27,6 +27,7 @@ endif
>  KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
>  KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
>  KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
> +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERREAD) += -Wno-stringop-overread
>  
>  ifdef CONFIG_CC_IS_CLANG
>  # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
> -- 
> 2.45.2
Greg Kroah-Hartman Dec. 9, 2024, 6:45 a.m. UTC | #2
On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

As this is different, my Ack does not still stand, sorry :(

> +# Currently, disable -Wstringop-overread for gcc-13+ and FORTIFY_SOURCE globally.
> +config GCC13_NO_STRINGOP_OVERREAD
> +	def_bool y

I hit this with gcc 14, it's not just a gcc 13 issue.

> +config CC_NO_STRINGOP_OVERREAD
> +	bool
> +	default y if CC_IS_GCC && GCC_VERSION >= 130000 && GCC13_NO_STRINGOP_OVERREAD && FORTIFY_SOURCE

Ok, I see you enabled this for more than 13, but why call it "13"?

> +
>  #
>  # For architectures that know their GCC __int128 support is sound
>  #
> diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
> index 1d13cecc7cc7..1abd41269fd0 100644
> --- a/scripts/Makefile.extrawarn
> +++ b/scripts/Makefile.extrawarn
> @@ -27,6 +27,7 @@ endif
>  KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
>  KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
>  KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
> +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERREAD) += -Wno-stringop-overread

I don't want this disabled for all files in the kernel, we only have one
that this is a problem for.  I think you disable this, the whole fortify
logic is disabled which is not the goal, why not just force the fortify
feature OFF if we have a "bad compiler" that can not support it?

So no, I don't think this is the correct solution here, sorry.

And it's odd that we are the only 2 people hitting it, has everyone else
just given up on gcc and moved on to using clang?

thanks,

greg k-h
Nilay Shroff Dec. 9, 2024, 5:09 p.m. UTC | #3
On 12/9/24 12:15, Greg Kroah-Hartman wrote:
> On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
>> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> 
> As this is different, my Ack does not still stand, sorry :(
> 
>> +# Currently, disable -Wstringop-overread for gcc-13+ and FORTIFY_SOURCE globally.
>> +config GCC13_NO_STRINGOP_OVERREAD
>> +	def_bool y
> 
> I hit this with gcc 14, it's not just a gcc 13 issue.
> 
>> +config CC_NO_STRINGOP_OVERREAD
>> +	bool
>> +	default y if CC_IS_GCC && GCC_VERSION >= 130000 && GCC13_NO_STRINGOP_OVERREAD && FORTIFY_SOURCE
> 
> Ok, I see you enabled this for more than 13, but why call it "13"?
Yeah I'd change it to GCC_NO_STRINGOP_OVERREAD.
> 
>> +
>>  #
>>  # For architectures that know their GCC __int128 support is sound
>>  #
>> diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
>> index 1d13cecc7cc7..1abd41269fd0 100644
>> --- a/scripts/Makefile.extrawarn
>> +++ b/scripts/Makefile.extrawarn
>> @@ -27,6 +27,7 @@ endif
>>  KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
>>  KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
>>  KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
>> +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERREAD) += -Wno-stringop-overread
> 
> I don't want this disabled for all files in the kernel, we only have one
> that this is a problem for.  I think you disable this, the whole fortify
> logic is disabled which is not the goal, why not just force the fortify
> feature OFF if we have a "bad compiler" that can not support it?
> 
okay so that means you recommend to disable FORTIFY_SOURCE for gcc-13+ instead 
of disabling -Wstringop-overread globally?

> So no, I don't think this is the correct solution here, sorry.
> 
> And it's odd that we are the only 2 people hitting it, has everyone else
> just given up on gcc and moved on to using clang?
I guess that developers are either using Clang or they haven't enabled CONFIG_FORTIFY_SOURCE 
if they're using gcc-13+.

Thanks,
--Nilay
Nathan Chancellor Dec. 9, 2024, 7:35 p.m. UTC | #4
On Sun, Dec 08, 2024 at 10:25:21AM -0800, Yury Norov wrote:
> On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
> > So the above statements expands to:
> > memcpy(pinst->cpumask.pcpu->bits, pcpumask->bits, nr_cpu_ids)
> > memcpy(pinst->cpumask.cbcpu->bits, cbcpumask->bits, nr_cpu_ids)
> > 
> > Now the compiler complains about "error: ‘__builtin_memcpy’ reading
> > between 257 and 536870904 bytes from a region of size 256". So the
> > value of nr_cpu_ids which gcc calculated is between 257 and 536870904.
> > This looks strange and incorrect.
> 
> Thanks for the detour into internals. I did the same by myself, and
> spent quite a lot of my time trying to understand why GCC believes
> that here we're trying to access memory beyond idx == 256 and up to
> a pretty random 536870904.
> 
> 256 is most likely NR_CPUS/8, and that makes sense. But I have no ideas
> what does this 536870904 mean. OK, it's ((u32)-64)>>3, but to me it's a
> random number. I'm quite sure cpumasks machinery can't be involved in
> generating it.

That can also be written as (UINT_MAX - 63) / 8, which I believe matches
the ultimate math of bitmap_size() if nbits is UINT_MAX (but I did not
fully verify) in bitmap_copy(). I tried building this code with the
in-review -fdiagnostics-details option from GCC [1] but it does not
really provide any other insight here. UINT_MAX probably comes from the
fact that for this configuration, large_cpumask_bits is an indeterminate
value for the compiler without link time optimization because it is an
extern in kernel/padata.c:

| #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
| #define nr_cpu_ids ((unsigned int)NR_CPUS)
| #else
| extern unsigned int nr_cpu_ids;
| #endif
| ...
| #if NR_CPUS <= BITS_PER_LONG
|   #define small_cpumask_bits ((unsigned int)NR_CPUS)
|   #define large_cpumask_bits ((unsigned int)NR_CPUS)
| #elif NR_CPUS <= 4*BITS_PER_LONG
|   #define small_cpumask_bits nr_cpu_ids
|   #define large_cpumask_bits ((unsigned int)NR_CPUS)
| #else
|   #define small_cpumask_bits nr_cpu_ids
|   #define large_cpumask_bits nr_cpu_ids
| #endif

From what I can tell, nothing in this callchain asserts to the compiler
that nr_cpu_ids cannot be larger than the compile time value of NR_CPUS
(I assume there is a check for this somewhere?), so it assumes that this
memcpy() can overflow if nr_cpu_ids is larger than NR_CPUS, which is
where that range appears to come from. I am able to kill this warning
with

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9278a50d514f..a1b0e213c638 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -836,6 +836,7 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
 static __always_inline
 void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
 {
+	BUG_ON(large_cpumask_bits > NR_CPUS);
 	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
 

although I am sure that is not going to be acceptable but it might give
a hint about what could be done to deal with this.

Another option would be taking advantage of the __diag infrastructure to
silence this warning around the bitmap_copy() in cpumask_copy(), stating
that we know this can never overflow because of <reason>. I think that
would be much more palpable than disabling the warning globally for the
kernel, much like Greg said.

[1]: https://inbox.sourceware.org/gcc-patches/20241105163132.1922052-1-qing.zhao@oracle.com/

Cheers,
Nathan
Nathan Chancellor Dec. 9, 2024, 8:03 p.m. UTC | #5
On Mon, Dec 09, 2024 at 07:45:51AM +0100, Greg Kroah-Hartman wrote:
> > diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
> > index 1d13cecc7cc7..1abd41269fd0 100644
> > --- a/scripts/Makefile.extrawarn
> > +++ b/scripts/Makefile.extrawarn
> > @@ -27,6 +27,7 @@ endif
> >  KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
> >  KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
> >  KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
> > +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERREAD) += -Wno-stringop-overread
> 
> I don't want this disabled for all files in the kernel, we only have one
> that this is a problem for.  I think you disable this, the whole fortify
> logic is disabled which is not the goal, why not just force the fortify
> feature OFF if we have a "bad compiler" that can not support it?

I am glad we agree that disabling -Wstringop-overread for the kernel
entirely is the wrong way to approach this but I think that turning off
ALL of FORTIFY_SOURCE for these compiler versions (which are some of the
latest available) is also the wrong approach, especially in this current
situation where it seems like it is unreasonable to expect the compiler
not to warn about a potential overread here with the amount of
information available to it but maybe I am misunderstanding something
here:

https://lore.kernel.org/20241209193558.GA1597021@ax162/

If it is not possible to shut the compiler up by changing the source to
be more robust, I think we should strongly consider disabling it in
directly in cpumask_copy() using the __diag infrastructure. I think it
is underutilized as a solution to warnings like this.

> And it's odd that we are the only 2 people hitting it, has everyone else
> just given up on gcc and moved on to using clang?

Maybe people are not using CONFIG_WERROR=y and W=e when hitting this so
they do not notice? It also only became visible in 6.12 because of the
'inline' -> '__always_inline' changes in bitmap.h and cpumask.h, since
prior to that, the size of the objects being passed to memcpy() were not
known, so FORTIFY could not catch them (another +1 for that change).
From what I can tell, it also requires a CONFIG_NR_CPUS value of 256 or
greater, otherwise large_cpumask_bits is NR_CPUS, a compile time
constant.

Fedora clearly sees this though but it does not break their builds so I
am guessing they have not reported it upstream yet?

https://koji.fedoraproject.org/koji/taskinfo?taskID=126644404
https://kojipkgs.fedoraproject.org//work/tasks/4404/126644404/build.log

For the record, clang has no warning like this because with how clang is
currently architectured, the vast majority of warnings happen in the
front end, where there is usually not enough reliable information
available to make these kind of warnings be accurate, at least from my
understanding.

Cheers,
Nathan
Yury Norov Dec. 9, 2024, 8:43 p.m. UTC | #6
On Mon, Dec 09, 2024 at 01:03:00PM -0700, Nathan Chancellor wrote:
> Maybe people are not using CONFIG_WERROR=y and W=e when hitting this so
> they do not notice? It also only became visible in 6.12 because of the
> 'inline' -> '__always_inline' changes in bitmap.h and cpumask.h, since
> prior to that, the size of the objects being passed to memcpy() were not
> known, so FORTIFY could not catch them (another +1 for that change).

Thanks, but I'm actually not happy with that series (ab6b1010dab68f6d4).
The original motivation was that one part of compiler decided to outline
the pure wrappers or lightweight inline implementation for small bitmaps,
like those fitting inside a machine word. 

After that, another part of compiler started complaining that outlined
helpers mismatch the sections - .text and .init.data.

(Not mentioning that the helpers were not designed to be real outlined
functions, and doing that adds ~3k to kernel image.)

I don't like forcing compiler to do this or that, but in this case I
just don't know how to teach it to outline the function twice, if it
wants to do that. This should be done automatically, I guess...

Similarly, I don't know how to teach it to keep the functions inlined,
other than forcing it to do so. I really wonder what made it thinking
that this deserves to be a real function:

 static __always_inline
 bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p,
                     const struct cpumask *src2p)
 {
         return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
                                           cpumask_bits(src2p), small_cpumask_bits);
 }

I guess, there are more 'functions' of that sort that outlined for nothing
in the kernel, and who knows how bloated is it?
Nathan Chancellor Dec. 9, 2024, 10:24 p.m. UTC | #7
On Mon, Dec 09, 2024 at 12:43:54PM -0800, Yury Norov wrote:
> On Mon, Dec 09, 2024 at 01:03:00PM -0700, Nathan Chancellor wrote:
> > Maybe people are not using CONFIG_WERROR=y and W=e when hitting this so
> > they do not notice? It also only became visible in 6.12 because of the
> > 'inline' -> '__always_inline' changes in bitmap.h and cpumask.h, since
> > prior to that, the size of the objects being passed to memcpy() were not
> > known, so FORTIFY could not catch them (another +1 for that change).
> 
> Thanks, but I'm actually not happy with that series (ab6b1010dab68f6d4).
> The original motivation was that one part of compiler decided to outline
> the pure wrappers or lightweight inline implementation for small bitmaps,
> like those fitting inside a machine word. 
> 
> After that, another part of compiler started complaining that outlined
> helpers mismatch the sections - .text and .init.data.

Not another part of the compiler but modpost, a kernel tool, started
complaining. If modpost could perform control flow analysis, it could
avoid false positives such as the one from ab6b1010dab68 by seeing more
of the callchain rather than just the outlined function being called
with a potentially discarded variable.

> (Not mentioning that the helpers were not designed to be real outlined
> functions, and doing that adds ~3k to kernel image.)

Isn't the point of '__always_inline' to convey this to the compiler? As
far as I understand it, the C standard permits the compiler is
completely free to ignore 'inline', which could happen for any number of
reasons, especially with code generation options such as the sanitizers
or other instrumentation. If you know that these functions need to be
inlined to generate better code but the compiler doesn't, why not tell
it?

> I don't like forcing compiler to do this or that, but in this case I
> just don't know how to teach it to outline the function twice, if it
> wants to do that. This should be done automatically, I guess...

I do not think that I understand what you are getting at or asking for
here, sorry. Are you saying you would expect the compiler to split
bitmap_and() into basically bitmap_and_small_const_nbits() and
__bitmap_and() then decide which to call in cpumask_and() based on the
condition of small_const_nbits(nbits) at a particular site? Isn't that
basically what we are allowing the compiler to figure out by always
inlining these functions into their call sites?

> Similarly, I don't know how to teach it to keep the functions inlined,
> other than forcing it to do so.

That's pretty much what '__always_inline' is, right? It's you as the
programmer saying "I know that this needs to be inlined for xyz reason
so I really need you to do it". Otherwise, you are just asking to tweak
a heuristic.

Cheers,
Nathan
Nilay Shroff Dec. 10, 2024, 8:28 a.m. UTC | #8
On 12/10/24 01:05, Nathan Chancellor wrote:
> On Sun, Dec 08, 2024 at 10:25:21AM -0800, Yury Norov wrote:
>> On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
>>> So the above statements expands to:
>>> memcpy(pinst->cpumask.pcpu->bits, pcpumask->bits, nr_cpu_ids)
>>> memcpy(pinst->cpumask.cbcpu->bits, cbcpumask->bits, nr_cpu_ids)
>>>
>>> Now the compiler complains about "error: ‘__builtin_memcpy’ reading
>>> between 257 and 536870904 bytes from a region of size 256". So the
>>> value of nr_cpu_ids which gcc calculated is between 257 and 536870904.
>>> This looks strange and incorrect.
>>
>> Thanks for the detour into internals. I did the same by myself, and
>> spent quite a lot of my time trying to understand why GCC believes
>> that here we're trying to access memory beyond idx == 256 and up to
>> a pretty random 536870904.
>>
>> 256 is most likely NR_CPUS/8, and that makes sense. But I have no ideas
>> what does this 536870904 mean. OK, it's ((u32)-64)>>3, but to me it's a
>> random number. I'm quite sure cpumasks machinery can't be involved in
>> generating it.
> 
> That can also be written as (UINT_MAX - 63) / 8, which I believe matches
> the ultimate math of bitmap_size() if nbits is UINT_MAX (but I did not
> fully verify) in bitmap_copy(). I tried building this code with the
> in-review -fdiagnostics-details option from GCC [1] but it does not
> really provide any other insight here. UINT_MAX probably comes from the
> fact that for this configuration, large_cpumask_bits is an indeterminate
> value for the compiler without link time optimization because it is an
> extern in kernel/padata.c:
> 
> | #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
> | #define nr_cpu_ids ((unsigned int)NR_CPUS)
> | #else
> | extern unsigned int nr_cpu_ids;
> | #endif
> | ...
> | #if NR_CPUS <= BITS_PER_LONG
> |   #define small_cpumask_bits ((unsigned int)NR_CPUS)
> |   #define large_cpumask_bits ((unsigned int)NR_CPUS)
> | #elif NR_CPUS <= 4*BITS_PER_LONG
> |   #define small_cpumask_bits nr_cpu_ids
> |   #define large_cpumask_bits ((unsigned int)NR_CPUS)
> | #else
> |   #define small_cpumask_bits nr_cpu_ids
> |   #define large_cpumask_bits nr_cpu_ids
> | #endif
> 
> From what I can tell, nothing in this callchain asserts to the compiler
> that nr_cpu_ids cannot be larger than the compile time value of NR_CPUS
> (I assume there is a check for this somewhere?), so it assumes that this
> memcpy() can overflow if nr_cpu_ids is larger than NR_CPUS, which is
> where that range appears to come from. I am able to kill this warning
> with
> 
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 9278a50d514f..a1b0e213c638 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -836,6 +836,7 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
>  static __always_inline
>  void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
>  {
> +	BUG_ON(large_cpumask_bits > NR_CPUS);
>  	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
>  }
>  
> 
> although I am sure that is not going to be acceptable but it might give
> a hint about what could be done to deal with this.
> 
> Another option would be taking advantage of the __diag infrastructure to
> silence this warning around the bitmap_copy() in cpumask_copy(), stating
> that we know this can never overflow because of <reason>. I think that
> would be much more palpable than disabling the warning globally for the
> kernel, much like Greg said.
> 
Okay so I think you (and Greg) were suggesting instead of disabling 
-Wstringop-overread globally or tuning it off for a particular source
file, lets disable it on gcc-13+ while we invoke bitmap_copy() as shown
below: 

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index d0ed9583743f..e61b9f3ff6a7 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -139,6 +139,18 @@
 #define __diag_GCC_8(s)
 #endif
 
+#if GCC_VERSION >= 130000
+#define __diag_GCC_13(s)       __diag(s)
+#else
+#define __diag_GCC_13(s)
+#endif
+
+#if GCC_VERSION >= 140000
+#define __diag_GCC_14(s)       __diag(s)
+#else
+#define __diag_GCC_14(s)
+#endif
+
 #define __diag_ignore_all(option, comment) \
        __diag(__diag_GCC_ignore option)
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9278a50d514f..6885856e38b0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -836,7 +836,23 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
 static __always_inline
 void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
 {
+       /*
+        * Silence -Wstringop-overead warning generated while copying cpumask
+        * bits on gcc-13+ and CONFIG_FORTIFY_SOURCE=y. The gcc-13+ emits
+        * warning suggesting "we're trying to copy nbits which potentially
+        * exceeds NR_CPUS. Apparently, this seems false positive and might be
+        * a gcc bug as we know that large_cpumask_bits should never exceed
+        * NR_CPUS.
+        */
+       __diag_push();
+       __diag_ignore(GCC, 13, "-Wstringop-overread",
+               "Ignore string overflow warning while copying cpumask bits");
+       __diag_ignore(GCC, 14, "-Wstringop-overread",
+               "Ignore string overflow warning while copying cpumask bits");
+
        bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
+
+       __diag_pop();
 }

Does the above change look good to everyone?

Thanks,
--Nilay
Nathan Chancellor Dec. 10, 2024, 4:14 p.m. UTC | #9
On Tue, Dec 10, 2024 at 01:58:00PM +0530, Nilay Shroff wrote:
> Okay so I think you (and Greg) were suggesting instead of disabling 
> -Wstringop-overread globally or tuning it off for a particular source
> file, lets disable it on gcc-13+ while we invoke bitmap_copy() as shown
> below: 

I cannot speak for Greg but yes, this is generally what I had in mind, I
have a few comments below.

> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index d0ed9583743f..e61b9f3ff6a7 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -139,6 +139,18 @@
>  #define __diag_GCC_8(s)
>  #endif
>  
> +#if GCC_VERSION >= 130000
> +#define __diag_GCC_13(s)       __diag(s)
> +#else
> +#define __diag_GCC_13(s)
> +#endif
> +
> +#if GCC_VERSION >= 140000
> +#define __diag_GCC_14(s)       __diag(s)
> +#else
> +#define __diag_GCC_14(s)
> +#endif

You do not need to add __diag_GCC_14 because __diag_GCC_13 covers
GCC 13 and newer.

>  #define __diag_ignore_all(option, comment) \
>         __diag(__diag_GCC_ignore option)
>  
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 9278a50d514f..6885856e38b0 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -836,7 +836,23 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
>  static __always_inline
>  void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
>  {
> +       /*
> +        * Silence -Wstringop-overead warning generated while copying cpumask
> +        * bits on gcc-13+ and CONFIG_FORTIFY_SOURCE=y. The gcc-13+ emits
> +        * warning suggesting "we're trying to copy nbits which potentially
> +        * exceeds NR_CPUS. Apparently, this seems false positive and might be
> +        * a gcc bug as we know that large_cpumask_bits should never exceed
> +        * NR_CPUS.

I think the last sentence needs to be either dropped entirely or needs
to have more assertive language. While this might be a false positive, I
think it is entirely unreasonable to expect GCC to know that
large_cpumask_bits when it is nr_cpu_ids is bounded by NR_CPUS because
it does not have the definition of nr_cpu_ids visible at this point and
even if it did, it is still a global variable, so it has to assume that
value could be anything in lieu of an explicit bounds check.

Maybe something like this for the full comment?

/*
 * Silence instances of -Wstringop-overread that come from the memcpy() in
 * bitmap_copy() that may appear with GCC 13+, CONFIG_FORTIFY_SOURCE=y, and
 * and CONFIG_NR_CPUS > 256, as the length of the memcpy() in bitmap_copy() will
 * not a compile time constant. Without an explicit bounds check on the length
 * of the copy in this path, GCC will assume the length could be 0 to UINT_MAX,
 * which would trigger an overread of the source if it were to happen. As
 * nr_cpu_ids is known to be bounded by NR_CPUS, this copy will always be in
 * bounds.
 */

> +        */
> +       __diag_push();
> +       __diag_ignore(GCC, 13, "-Wstringop-overread",
> +               "Ignore string overflow warning while copying cpumask bits");
> +       __diag_ignore(GCC, 14, "-Wstringop-overread",
> +               "Ignore string overflow warning while copying cpumask bits");

This __diag_ignore() can be dropped as well.

> +
>         bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
> +
> +       __diag_pop();
>  }
> 
> Does the above change look good to everyone?

I think this seems reasonable to me, but it might be good to get some
feedback from the hardening folks.

Cheers,
Nathan
Nilay Shroff Dec. 11, 2024, 9:16 a.m. UTC | #10
On 12/10/24 21:44, Nathan Chancellor wrote:
> On Tue, Dec 10, 2024 at 01:58:00PM +0530, Nilay Shroff wrote:
>> Okay so I think you (and Greg) were suggesting instead of disabling 
>> -Wstringop-overread globally or tuning it off for a particular source
>> file, lets disable it on gcc-13+ while we invoke bitmap_copy() as shown
>> below: 
> 
> I cannot speak for Greg but yes, this is generally what I had in mind, I
> have a few comments below.
> 
>> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
>> index d0ed9583743f..e61b9f3ff6a7 100644
>> --- a/include/linux/compiler-gcc.h
>> +++ b/include/linux/compiler-gcc.h
>> @@ -139,6 +139,18 @@
>>  #define __diag_GCC_8(s)
>>  #endif
>>  
>> +#if GCC_VERSION >= 130000
>> +#define __diag_GCC_13(s)       __diag(s)
>> +#else
>> +#define __diag_GCC_13(s)
>> +#endif
>> +
>> +#if GCC_VERSION >= 140000
>> +#define __diag_GCC_14(s)       __diag(s)
>> +#else
>> +#define __diag_GCC_14(s)
>> +#endif
> 
> You do not need to add __diag_GCC_14 because __diag_GCC_13 covers
> GCC 13 and newer.
Yeah ok, I would remove __diag_GCC_14.

> 
>>  #define __diag_ignore_all(option, comment) \
>>         __diag(__diag_GCC_ignore option)
>>  
>> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
>> index 9278a50d514f..6885856e38b0 100644
>> --- a/include/linux/cpumask.h
>> +++ b/include/linux/cpumask.h
>> @@ -836,7 +836,23 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
>>  static __always_inline
>>  void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
>>  {
>> +       /*
>> +        * Silence -Wstringop-overead warning generated while copying cpumask
>> +        * bits on gcc-13+ and CONFIG_FORTIFY_SOURCE=y. The gcc-13+ emits
>> +        * warning suggesting "we're trying to copy nbits which potentially
>> +        * exceeds NR_CPUS. Apparently, this seems false positive and might be
>> +        * a gcc bug as we know that large_cpumask_bits should never exceed
>> +        * NR_CPUS.
> 
> I think the last sentence needs to be either dropped entirely or needs
> to have more assertive language. While this might be a false positive, I
> think it is entirely unreasonable to expect GCC to know that
> large_cpumask_bits when it is nr_cpu_ids is bounded by NR_CPUS because
> it does not have the definition of nr_cpu_ids visible at this point and
> even if it did, it is still a global variable, so it has to assume that
> value could be anything in lieu of an explicit bounds check.
> 
> Maybe something like this for the full comment?
> 
> /*
>  * Silence instances of -Wstringop-overread that come from the memcpy() in
>  * bitmap_copy() that may appear with GCC 13+, CONFIG_FORTIFY_SOURCE=y, and
>  * and CONFIG_NR_CPUS > 256, as the length of the memcpy() in bitmap_copy() will
>  * not a compile time constant. Without an explicit bounds check on the length
>  * of the copy in this path, GCC will assume the length could be 0 to UINT_MAX,
>  * which would trigger an overread of the source if it were to happen. As
>  * nr_cpu_ids is known to be bounded by NR_CPUS, this copy will always be in
>  * bounds.
>  */
Okay I would update comment.
> 
>> +        */
>> +       __diag_push();
>> +       __diag_ignore(GCC, 13, "-Wstringop-overread",
>> +               "Ignore string overflow warning while copying cpumask bits");
>> +       __diag_ignore(GCC, 14, "-Wstringop-overread",
>> +               "Ignore string overflow warning while copying cpumask bits");
> 
> This __diag_ignore() can be dropped as well.
Agreed.
> 
>> +
>>         bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
>> +
>> +       __diag_pop();
>>  }
>>
>> Does the above change look good to everyone?
> 
> I think this seems reasonable to me, but it might be good to get some
> feedback from the hardening folks.
> 
> Cheers,
> Nathan

Thanks,
--Nilay
Kees Cook Dec. 12, 2024, 6:24 p.m. UTC | #11
On Sun, Dec 08, 2024 at 09:42:28PM +0530, Nilay Shroff wrote:
> While building the powerpc code using gcc 13.x, I came across following
> errors generated for kernel/padata.c file:
> 
>   CC      kernel/padata.o
> In file included from ./include/linux/string.h:390,
>                  from ./arch/powerpc/include/asm/paca.h:16,
>                  from ./arch/powerpc/include/asm/current.h:13,
>                  from ./include/linux/thread_info.h:23,
>                  from ./include/asm-generic/preempt.h:5,
>                  from ./arch/powerpc/include/generated/asm/preempt.h:1,
>                  from ./include/linux/preempt.h:79,
>                  from ./include/linux/spinlock.h:56,
>                  from ./include/linux/swait.h:7,
>                  from ./include/linux/completion.h:12,
>                  from kernel/padata.c:14:
> In function ‘bitmap_copy’,
>     inlined from ‘cpumask_copy’ at ./include/linux/cpumask.h:839:2,
>     inlined from ‘__padata_set_cpumasks’ at kernel/padata.c:730:2:
> ./include/linux/fortify-string.h:114:33: error: ‘__builtin_memcpy’ reading between 257 and 536870904 bytes from a region of size 256 [-Werror=stringop-overread]
>   114 | #define __underlying_memcpy     __builtin_memcpy
>       |                                 ^
> ./include/linux/fortify-string.h:633:9: note: in expansion of macro ‘__underlying_memcpy’
>   633 |         __underlying_##op(p, q, __fortify_size);                        \
>       |         ^~~~~~~~~~~~~
> ./include/linux/fortify-string.h:678:26: note: in expansion of macro ‘__fortify_memcpy_chk’
>   678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
>       |                          ^~~~~~~~~~~~~~~~~~~~
> ./include/linux/bitmap.h:259:17: note: in expansion of macro ‘memcpy’
>   259 |                 memcpy(dst, src, len);
>       |                 ^~~~~~
> kernel/padata.c: In function ‘__padata_set_cpumasks’:
> kernel/padata.c:713:48: note: source object ‘pcpumask’ of size [0, 256]
>   713 |                                  cpumask_var_t pcpumask,
>       |                                  ~~~~~~~~~~~~~~^~~~~~~~

We've seen this also on x86 with 320 CPUs[1] (which perhaps was already
known since I see Thomas Weißschuh in CC already.

Building with -fdiagnostics-details we see:

In function 'bitmap_copy',
    inlined from 'cpumask_copy' at ../include/linux/cpumask.h:839:2,
    inlined from '__padata_set_cpumasks' at ../kernel/padata.c:723:2:
../include/linux/fortify-string.h:114:33: error: '__builtin_memcpy' reading between 41 and 536870904 bytes from a region of size 40 [-Werror=stringop-overread]
  114 | #define __underlying_memcpy     __builtin_memcpy
      |                                 ^
../include/linux/fortify-string.h:633:9: note: in expansion of macro '__underlying_memcpy'
  633 |         __underlying_##op(p, q, __fortify_size);                        \
      |         ^~~~~~~~~~~~~
../include/linux/fortify-string.h:678:26: note: in expansion of macro '__fortify_memcpy_chk'
  678 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
      |                          ^~~~~~~~~~~~~~~~~~~~
../include/linux/bitmap.h:259:17: note: in expansion of macro 'memcpy'
  259 |                 memcpy(dst, src, len);
      |                 ^~~~~~
  '__padata_set_cpumasks': events 1-2
../include/linux/fortify-string.h:613:36:
  612 |         if (p_size_field != SIZE_MAX &&
      |             ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  613 |             p_size != p_size_field && p_size_field < size)
      |             ~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~
      |                                    |
      |                                    (1) when the condition is evaluated to false
      |                                    (2) when the condition is evaluated to true
  '__padata_set_cpumasks': event 3
  114 | #define __underlying_memcpy     __builtin_memcpy
      |                                 ^
      |                                 |
      |                                 (3) out of array bounds here

What's happening is that GCC is seeing that the run-time checks of
FORTIFY_SOURCE is checking for this condition (i.e. there is a path
through the code where the bounds could be too large and it still calls
memcpy) -- which is the point of this runtime check -- but that GCC has
found a compile-time path where this can be true.

Built without CONFIG_WERROR, we can examine the padata.o file for the
FORTIFY_SOURCE warning string to find the field:

$ strings gcc-diag/kernel/padata.o | grep ^field
field "dst" at include/linux/bitmap.h:259

which confirms it is this, which has already been seen in the thread:

static __always_inline
void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned
int nbits)
{
        unsigned int len = bitmap_size(nbits);

        if (small_const_nbits(nbits))
                *dst = *src;
        else
                memcpy(dst, src, len);
}

and I think what Nathan already discussed[2] is all true (i.e. that
nr_cpu_ids is unknown -- but bounded to a smaller range than [0..UINT_MAX]
due to the calculation in bitmap_size()). Nathan's fix silences the
warning. We could narrow the scope to only run-time-specified nr_cpus:


diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9278a50d514f..8f1a694109e9 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -836,6 +836,8 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
 static __always_inline
 void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
 {
+	if (!__builtin_constant_p(large_cpumask_bits))
+		BUG_ON(large_cpumask_bits > NR_CPUS);
 	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
 

Or we could avoid the BUG_ON check and simply silence the warning
explicitly:


diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9278a50d514f..0725b26f21b8 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -836,6 +836,8 @@ void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
 static __always_inline
 void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
 {
+	if (!__builtin_constant_p(large_cpumask_bits))
+		OPTIMIZER_HIDE_VAR(large_cpumask_bits);
 	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
 
Or we could unconditionally put the OPTIMIZER_HIDE_VAR() inside
bitmap_copy() itself:


diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 262b6596eca5..5503ccabe05a 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -251,12 +251,14 @@ static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 static __always_inline
 void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
-	unsigned int len = bitmap_size(nbits);
-
-	if (small_const_nbits(nbits))
+	if (small_const_nbits(nbits)) {
 		*dst = *src;
-	else
+	} else {
+		unsigned int len = bitmap_size(nbits);
+
+		OPTIMIZER_HIDE_VAR(len);
 		memcpy(dst, src, len);
+	}
 }
 
 /*

I prefer any of these to doing the build-system disabling of the
warning.

-Kees

[1] https://lore.kernel.org/all/202411021337.85E9BB06@keescook/
[2] https://lore.kernel.org/all/20241209193558.GA1597021@ax162/
Kees Cook Dec. 12, 2024, 6:47 p.m. UTC | #12
On Thu, Dec 12, 2024 at 10:24:36AM -0800, Kees Cook wrote:
> Or we could unconditionally put the OPTIMIZER_HIDE_VAR() inside
> bitmap_copy() itself:
> 
> 
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 262b6596eca5..5503ccabe05a 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -251,12 +251,14 @@ static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
>  static __always_inline
>  void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
>  {
> -	unsigned int len = bitmap_size(nbits);
> -
> -	if (small_const_nbits(nbits))
> +	if (small_const_nbits(nbits)) {
>  		*dst = *src;
> -	else
> +	} else {
> +		unsigned int len = bitmap_size(nbits);
> +
> +		OPTIMIZER_HIDE_VAR(len);
>  		memcpy(dst, src, len);
> +	}
>  }
>  
>  /*
> 
> I prefer any of these to doing the build-system disabling of the
> warning.

Actually, this should probably be done in the FORTIFY macro instead --
it's what actually tripping the GCC warning since it is the code that is
gleefully issuing a warning and then continuing with a overflowing copy
anyway...


diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 0d99bf11d260..7203acfb9f17 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -630,7 +630,13 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 		  __fortify_size,					\
 		  "field \"" #p "\" at " FILE_LINE,			\
 		  __p_size_field);					\
-	__underlying_##op(p, q, __fortify_size);			\
+	if (__builtin_constant_p(__fortify_size)) {			\
+		__underlying_##op(p, q, __fortify_size);		\
+	} else {							\
+		size_t ___fortify_size = __fortify_size;		\
+		OPTIMIZER_HIDE_VAR(___fortify_size);			\
+		__underlying_##op(p, q, ___fortify_size);		\
+	}								\
 })
 
 /*
Yury Norov Dec. 12, 2024, 7:34 p.m. UTC | #13
On Thu, Dec 12, 2024 at 10:47:40AM -0800, Kees Cook wrote:
> On Thu, Dec 12, 2024 at 10:24:36AM -0800, Kees Cook wrote:
> > Or we could unconditionally put the OPTIMIZER_HIDE_VAR() inside
> > bitmap_copy() itself:
> > 
> > 
> > diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> > index 262b6596eca5..5503ccabe05a 100644
> > --- a/include/linux/bitmap.h
> > +++ b/include/linux/bitmap.h
> > @@ -251,12 +251,14 @@ static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
> >  static __always_inline
> >  void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
> >  {
> > -	unsigned int len = bitmap_size(nbits);
> > -
> > -	if (small_const_nbits(nbits))
> > +	if (small_const_nbits(nbits)) {
> >  		*dst = *src;
> > -	else
> > +	} else {
> > +		unsigned int len = bitmap_size(nbits);
> > +
> > +		OPTIMIZER_HIDE_VAR(len);
> >  		memcpy(dst, src, len);
> > +	}
> >  }
> >  
> >  /*
> > 
> > I prefer any of these to doing the build-system disabling of the
> > warning.
> 
> Actually, this should probably be done in the FORTIFY macro instead --
> it's what actually tripping the GCC warning since it is the code that is
> gleefully issuing a warning and then continuing with a overflowing copy
> anyway...
> 
> 
> diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
> index 0d99bf11d260..7203acfb9f17 100644
> --- a/include/linux/fortify-string.h
> +++ b/include/linux/fortify-string.h
> @@ -630,7 +630,13 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
>  		  __fortify_size,					\
>  		  "field \"" #p "\" at " FILE_LINE,			\
>  		  __p_size_field);					\
> -	__underlying_##op(p, q, __fortify_size);			\
> +	if (__builtin_constant_p(__fortify_size)) {			\
> +		__underlying_##op(p, q, __fortify_size);		\
> +	} else {							\
> +		size_t ___fortify_size = __fortify_size;		\
> +		OPTIMIZER_HIDE_VAR(___fortify_size);			\
> +		__underlying_##op(p, q, ___fortify_size);		\
> +	}								\
>  })

I like this more than anything else. Bitmap API is an innocent victim,
and trashing it with unrelated warning suppressors is just bad. If GCC
will get more aggressive, we'll end up with the kernel all trashed with
this OPTIMIZER_HIDE_VAR() stuff.

I tried to formulate it when discussed __always_inline story, but now I
think I can do it clear. Bitmaps is an example of very basic coding. It's
not very complicated, but it's the 2nd most popular kernel API after
spinlocks.

People literally spent hundreds hours on every single line of core APIs.
I think all that people expect that the most reviewed system code in
the world will be somewhat a reference for compilers and other tools.

If compiler can't inline a pure wrapper with no additional
functionality at all, or if modpost can't understand that inline
function declared in header can't be a 'section mismatch', or 
if CONFIG_FORTIFY complains about an overread that can never happen,
it's not a reason to pollute bitmaps, spinlocks, atomics or whatever
else, or even touch them. The tools should be fixed, not the code.

Acked-by: Yury Norov <yury.norov@gmail.com>
diff mbox series

Patch

diff --git a/init/Kconfig b/init/Kconfig
index a20e6efd3f0f..ff2f54520551 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,6 +920,14 @@  config CC_STRINGOP_OVERFLOW
 	bool
 	default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW
 
+# Currently, disable -Wstringop-overread for gcc-13+ and FORTIFY_SOURCE globally.
+config GCC13_NO_STRINGOP_OVERREAD
+	def_bool y
+
+config CC_NO_STRINGOP_OVERREAD
+	bool
+	default y if CC_IS_GCC && GCC_VERSION >= 130000 && GCC13_NO_STRINGOP_OVERREAD && FORTIFY_SOURCE
+
 #
 # For architectures that know their GCC __int128 support is sound
 #
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 1d13cecc7cc7..1abd41269fd0 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -27,6 +27,7 @@  endif
 KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
 KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
 KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
+KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERREAD) += -Wno-stringop-overread
 
 ifdef CONFIG_CC_IS_CLANG
 # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.