This is nasty.

https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/9137008215

When preprocessed, we get:

diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 0a83f237259f..6b8d3660240a 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -958,7 +958,28 @@ cpu_add_to_runqueue(const struct scheduler *ops,
unsigned int cpu)
     write_lock_irqsave(&prv->lock, flags);
 
     rqd_ins = &prv->rql;
+
+#if 0
     list_for_each_entry ( rqd, &prv->rql, rql )
+#else
+    for ( (rqd) = ({
+                typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+                    ((&prv->rql)->next);
+                (typeof(*(rqd)) *)
+                    ((char *)__mptr -
+                     __builtin_offsetof(typeof(*(rqd)),rql) );
+            });
+          &(rqd)->rql != // <-- problem expression
+              (&prv->rql);
+          (rqd) = ({
+                  typeof(((typeof(*(rqd)) *)((void*)0))->rql) *__mptr =
+                      ((rqd)->rql.next);
+                  (typeof(*(rqd)) *)
+                      ((char *)__mptr -
+                       __builtin_offsetof(typeof(*(rqd)),rql) );
+              })
+        )
+#endif
     {
         /* Remember first unused queue index. */
         if ( !rqi_unused && rqd->id > rqi )


The alignment of csched2_runqueue_data is 8, while csched2_private is 4.

priv's list_head for rql is at +28 (+0x1c), and list_for_each_entry()
performs a buggily-typed container_of(), treating a csched2_private as
if it were csched2_runqueue_data.

It functions because it's only an address equality check, but it's also
why UBSAN objects.

This seems to fix the issue:

diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index 6b8d3660240a..ab938942d75f 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -537,7 +537,8 @@ struct csched2_private {
     unsigned int ratelimit_us;         /* Rate limiting for this
scheduler   */
 
     unsigned int active_queues;        /* Number of active
runqueues         */
-    struct list_head rql;              /* List of
runqueues                  */
+    struct list_head rql               /* List of
runqueues                  */
+    __aligned(alignof(struct csched2_runqueue_data));
 
     cpumask_t initialized;             /* CPUs part of this
scheduler        */
     struct list_head sdom;             /* List of domains (for debug
key)    */

but it's obviously not a viable fix.  I can't help feeling that the bug
is really in the list macros.

~Andrew

Reply via email to