On 24/07/2019 10:13, Juergen Gross wrote:
> The fix is a one-liner. :-)
> 
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index f0bc5b3161..da9efb147f 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -2207,6 +2207,7 @@ static struct sched_unit 
> *sched_wait_rendezvous_in(struct sched_unit *prev,
>          if ( unlikely(!scheduler_active) )
>          {
>              ASSERT(is_idle_unit(prev));
> +            atomic_set(&prev->next_task->rendezvous_out_cnt, 0);
>              prev->rendezvous_in_cnt = 0;
>          }
>      }

Even with that applied, I'm still seeing it :(

(XEN) [  311.223780] Watchdog timer detects that CPU1 is stuck!

(XEN) [  311.229413] ----[ Xen-4.13.0  x86_64  debug=y   Not tainted ]----

(XEN) [  311.236002] CPU:    1

(XEN) [  311.238774] RIP:    e008:[<ffff82d0802408a8>] 
sched_context_switched+0x92/0x101

(XEN) [  311.246575] RFLAGS: 0000000000000202   CONTEXT: hypervisor

(XEN) [  311.252556] rax: 0000000000000002   rbx: ffff83081cc635b0   rcx: 
0000000000000001

(XEN) [  311.260530] rdx: ffff83081cc63634   rsi: ffff83081cc8f000   rdi: 
ffff83081cc8f000

(XEN) [  311.268502] rbp: ffff83081cc87df0   rsp: ffff83081cc87dd0   r8:  
0000000000000000

(XEN) [  311.276474] r9:  ffff83081cc62000   r10: ffff83081cc62000   r11: 
ffff83081cc6b000

(XEN) [  311.284448] r12: ffff83081cc8f000   r13: ffff83081cc8f000   r14: 
ffff83081cc61e80

(XEN) [  311.292422] r15: ffff82d0805e2260   cr0: 000000008005003b   cr4: 
00000000001526e0

(XEN) [  311.300395] cr3: 00000000dd4ac000   cr2: 0000559b05a94048

(XEN) [  311.306288] fsb: 0000000000000000   gsb: ffff8880a3940000   gss: 
0000000000000000

(XEN) [  311.314262] ds: 002b   es: 002b   fs: 0000   gs: 0000   ss: e010   cs: 
e008

(XEN) [  311.321716] Xen code around <ffff82d0802408a8> 
(sched_context_switched+0x92/0x101):

(XEN) [  311.329862]  85 c0 74 08 f3 90 8b 02 <85> c0 75 f8 49 8b 44 24 10 66 
81 38 ff 7f 75 05

(XEN) [  311.338269] Xen stack trace from rsp=ffff83081cc87dd0:

(XEN) [  311.343904]    ffff83081cc8f000 ffff83081cc8f000 0000000000000000 
ffff83081cc635b0

(XEN) [  311.351963]    ffff83081cc87e28 ffff82d080240996 ffff83081cc61e98 
ffff82d08060a4a8

(XEN) [  311.360022]    ffff83081cc61e98 ffff82d08060a4a8 ffff83081cc635b0 
ffff83081cc87e80

(XEN) [  311.368083]    ffff82d080240f7a 0000000000000001 ffff83081cc8f000 
00000047588837ec

(XEN) [  311.376142]    000000011cc87ec0 ffff82d0805c3a00 ffff82d0805c3980 
ffffffffffffffff

(XEN) [  311.384205]    ffff82d0805d3980 ffff82d0805e2260 ffff83081cc87eb0 
ffff82d08024274a

(XEN) [  311.392263]    0000000000000001 ffff82d0805c3a00 0000000000000001 
0000000000000001

(XEN) [  311.400324]    ffff83081cc87ec0 ffff82d0802427bf ffff83081cc87ef0 
ffff82d080279a1d

(XEN) [  311.408385]    ffff83081cc8f000 ffff83081cc8f000 0000000000000001 
ffff83081cc635b0

(XEN) [  311.416443]    ffff83081cc87df0 ffff88809ee1ba00 ffff88809ee1ba00 
0000000000000000

(XEN) [  311.424504]    0000000000000000 0000000000000005 ffff88809ee1ba00 
0000000000000246

(XEN) [  311.432563]    aaaaaaaaaaaaaaaa 0000000000000000 000000000001ca00 
0000000000000000

(XEN) [  311.440625]    ffffffff810013aa ffffffff8203c190 deadbeefdeadf00d 
deadbeefdeadf00d

(XEN) [  311.448685]    0000010000000000 ffffffff810013aa 000000000000e033 
0000000000000246

(XEN) [  311.456747]    ffffc900400bfeb0 000000000000e02b 000000000000beef 
000000000000beef

(XEN) [  311.464807]    000000000000beef 000000000000beef 0000e01000000001 
ffff83081cc8f000

(XEN) [  311.472864]    000000379c665d00 00000000001526e0 0000000000000000 
0000000000000000

(XEN) [  311.480926]    0000060000000000 0000000000000000

(XEN) [  311.486041] Xen call trace:

(XEN) [  311.489332]    [<ffff82d0802408a8>] sched_context_switched+0x92/0x101

(XEN) [  311.496266]    [<ffff82d080240996>] 
schedule.c#sched_context_switch+0x7f/0x160

(XEN) [  311.503980]    [<ffff82d080240f7a>] schedule.c#sched_slave+0x28f/0x2b5

(XEN) [  311.510999]    [<ffff82d08024274a>] softirq.c#__do_softirq+0x61/0x8c

(XEN) [  311.517846]    [<ffff82d0802427bf>] do_softirq+0x13/0x15

(XEN) [  311.523653]    [<ffff82d080279a1d>] domain.c#idle_loop+0x52/0xa7

(XEN) [  311.530152]

(XEN) [  311.532144] CPU0 @ e008:ffff82d08024334d 
(stop_machine.c#stopmachine_wait_state+0x19/0x24)

(XEN) [  311.540899] CPU5 @ e008:ffff82d080243398 
(stop_machine.c#stopmachine_action+0x40/0x93)

(XEN) [  311.549307] CPU3 @ e008:ffff82d08024339e 
(stop_machine.c#stopmachine_action+0x46/0x93)

(XEN) [  311.557712] CPU4 @ e008:ffff82d08024339e 
(stop_machine.c#stopmachine_action+0x46/0x93)

(XEN) [  311.566119] CPU7 @ e008:ffff82d08024339e 
(stop_machine.c#stopmachine_action+0x46/0x93)

(XEN) [  311.574526] CPU2 @ e008:ffff82d080243398 
(stop_machine.c#stopmachine_action+0x40/0x93)

(XEN) [  311.582931] CPU6 @ e008:ffff82d080243398 
(stop_machine.c#stopmachine_action+0x40/0x93)

(XEN) [  311.591919]

(XEN) [  311.593914] ****************************************

(XEN) [  311.599374] Panic on CPU 1:

(XEN) [  311.602669] FATAL TRAP: vector = 2 (nmi)

(XEN) [  311.607088] [error_code=0000]

(XEN) [  311.610641] ****************************************

(XEN) [  311.616101]

(XEN) [  311.618095] Reboot in five seconds...

(XEN) [  311.622254] Executing kexec image on cpu1

(XEN) [  311.627534] Shot down all CPUs

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to