diff mbox series

ARM32 UBSAN failure in credit2

Message ID 9c2c6099-9399-4607-9533-4d2f6aa1afc8@citrix.com (mailing list archive)
State New
Headers show
Series ARM32 UBSAN failure in credit2 | expand

Commit Message

Andrew Cooper Feb. 14, 2025, 11:36 p.m. UTC
This is nasty.

https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215

When preprocessed, we get:

+    __aligned(alignof(struct csched2_runqueue_data));
 
     cpumask_t initialized;             /* CPUs part of this
scheduler        */
     struct list_head sdom;             /* List of domains (for debug
key)    */

but it's obviously not a viable fix.  I can't help feeling that the bug
is really in the list macros.

~Andrew

Comments

Juergen Gross Feb. 15, 2025, 11:14 a.m. UTC | #1
On 15.02.25 00:36, Andrew Cooper wrote:
> This is nasty.
> 
> https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
> 
> When preprocessed, we get:
> 
> diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
> index 0a83f237259f..6b8d3660240a 100644
> --- a/xen/common/sched/credit2.c
> +++ b/xen/common/sched/credit2.c
> @@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
> unsigned int cpu)
>       write_lock_irqsave(&prv->lock, flags);
>   
>       rqd_ins = &prv->rql;
> +
> +#if 0
>       list_for_each_entry ( rqd, &prv->rql, rql )
> +#else
> +    for ( (rqd) = ({
> +                typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
> +                    ((&prv->rql)->next);
> +                (typeof(*(rqd)) *)
> +                    ((char *)__mptr -
> +                     __builtin_offsetof(typeof(*(rqd)),rql) );
> +            });
> +          &(rqd)->rql != // <-- problem expression
> +              (&prv->rql);
> +          (rqd) = ({
> +                  typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
> +                      ((rqd)->rql.next);
> +                  (typeof(*(rqd)) *)
> +                      ((char *)__mptr -
> +                       __builtin_offsetof(typeof(*(rqd)),rql) );
> +              })
> +        )
> +#endif
>       {
>           /* Remember first unused queue index. */
>           if ( !rqi_unused && rqd->id > rqi )
> 
> 
> The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
> 
> priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
> performs a buggily-typed container_of(), treating a csched2_private as
> if it were csched2_runqueue_data.

No, I don't think so. It just compares the addresses of 2 struct list_head.
1 in csched2_runqueue_data and 1 in csched2_private.

> It functions because it's only an address equality check, but it's also
> why UBSAN objects.

struct list_head should require only 4 byte alignment, so I don't see why
this would trigger UBSAN. Could it be that UBSAN has a bug here?


Juergen
Juergen Gross Feb. 15, 2025, 1:46 p.m. UTC | #2
On 15.02.25 12:14, Jürgen Groß wrote:
> On 15.02.25 00:36, Andrew Cooper wrote:
>> This is nasty.
>>
>> https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215
>>
>> When preprocessed, we get:
>>
>> diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
>> index 0a83f237259f..6b8d3660240a 100644
>> --- a/xen/common/sched/credit2.c
>> +++ b/xen/common/sched/credit2.c
>> @@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
>> unsigned int cpu)
>>       write_lock_irqsave(&prv->lock, flags);
>>       rqd_ins = &prv->rql;
>> +
>> +#if 0
>>       list_for_each_entry ( rqd, &prv->rql, rql )
>> +#else
>> +    for ( (rqd) = ({
>> +                typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
>> +                    ((&prv->rql)->next);
>> +                (typeof(*(rqd)) *)
>> +                    ((char *)__mptr -
>> +                     __builtin_offsetof(typeof(*(rqd)),rql) );
>> +            });
>> +          &(rqd)->rql != // <-- problem expression
>> +              (&prv->rql);
>> +          (rqd) = ({
>> +                  typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
>> +                      ((rqd)->rql.next);
>> +                  (typeof(*(rqd)) *)
>> +                      ((char *)__mptr -
>> +                       __builtin_offsetof(typeof(*(rqd)),rql) );
>> +              })
>> +        )
>> +#endif
>>       {
>>           /* Remember first unused queue index. */
>>           if ( !rqi_unused && rqd->id > rqi )
>>
>>
>> The alignment of csched2_runqueue_data is 8, while csched2_private is 4.
>>
>> priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
>> performs a buggily-typed container_of(), treating a csched2_private as
>> if it were csched2_runqueue_data.
> 
> No, I don't think so. It just compares the addresses of 2 struct list_head.
> 1 in csched2_runqueue_data and 1 in csched2_private.
> 
>> It functions because it's only an address equality check, but it's also
>> why UBSAN objects.
> 
> struct list_head should require only 4 byte alignment, so I don't see why
> this would trigger UBSAN. Could it be that UBSAN has a bug here?

Ah, meanwhile I've got it.

I think we want something like:

#define list_for_each_entry(pos, head, member)                          \
     for ((pos) = list_empty(head) ? NULL : list_entry((head)->next,     \
                                                typeof(*(pos)), member); \
          pos;                                                           \
          (pos) = list_is_last((pos)->member) ? NULL :                   \
                  list_entry((pos)->member.next, typeof(*(pos)), member))


Juergen
diff mbox series

Patch

diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 0a83f237259f..6b8d3660240a 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -958,7 +958,28 @@  cpu_add_to_runqueue(const struct scheduler *ops,
unsigned int cpu)
     write_lock_irqsave(&prv->lock, flags);
 
     rqd_ins = &prv->rql;
+
+#if 0
     list_for_each_entry ( rqd, &prv->rql, rql )
+#else
+    for ( (rqd) = ({
+                typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+                    ((&prv->rql)->next);
+                (typeof(*(rqd)) *)
+                    ((char *)__mptr -
+                     __builtin_offsetof(typeof(*(rqd)),rql) );
+            });
+          &(rqd)->rql != // <-- problem expression
+              (&prv->rql);
+          (rqd) = ({
+                  typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+                      ((rqd)->rql.next);
+                  (typeof(*(rqd)) *)
+                      ((char *)__mptr -
+                       __builtin_offsetof(typeof(*(rqd)),rql) );
+              })
+        )
+#endif
     {
         /* Remember first unused queue index. */
         if ( !rqi_unused && rqd->id > rqi )


The alignment of csched2_runqueue_data is 8, while csched2_private is 4.

priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
performs a buggily-typed container_of(), treating a csched2_private as
if it were csched2_runqueue_data.

It functions because it's only an address equality check, but it's also
why UBSAN objects.

This seems to fix the issue:

diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 6b8d3660240a..ab938942d75f 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -537,7 +537,8 @@  struct csched2_private {
     unsigned int ratelimit_us;         /* Rate limiting for this
scheduler   */
 
     unsigned int active_queues;        /* Number of active
runqueues         */
-    struct list_head rql;              /* List of
runqueues                  */
+    struct list_head rql               /* List of
runqueues                  */