Hi Team, My enviroment has lots of flows exceed the limit , and print log like this in ovs-vswitchd.log:
"ofproto_dpif_upcall(handler27149)|WARN|upcall: datapath flow limit reached" then i encountered a coredump in ovs version 2.13, the kernel stack is below: watchdog: BUG: soft lockup - CPU#42 stuck for 67s! [ovs-vswitchd:113801] [34495627.812027] CPU: 42 PID: 113801 Comm: ovs-vswitchd Kdump: loaded Tainted: G W OE K------------ T 3.10.0-957.1.1.el7.x86_64 #1 [34495627.812032] task: ffff9e21980ce180 ti: ffff9e01c1028000 task.ti: ffff9e01c1028000 [34495627.812034] RIP: 0010:[<ffffffffac36a0b5>] [<ffffffffac36a0b5>] _raw_spin_unlock_irqrestore+0x15/0x20 [34495627.812044] RSP: 0018:ffff9e513fa83918 EFLAGS: 00000282 [34495627.812046] RAX: ffff9e2134a65648 RBX: ffff9e513fa838d0 RCX: 0000000000000000 [34495627.812048] RDX: ffff9e3134e56b10 RSI: 0000000000000282 RDI: 0000000000000282 [34495627.812049] RBP: ffff9e513fa83918 R08: ffff9e513fa83820 R09: ffff9e213fa5b780 [34495627.812051] R10: 0000000000000000 R11: 000000000000254c R12: ffff9e513fa83888 [34495627.812052] R13: ffffffffac375df2 R14: ffff9e513fa83918 R15: ffff9e2134a65640 [34495627.812055] FS: 00007f54407e3700(0000) GS:ffff9e513fa80000(0000) knlGS:0000000000000000 [34495627.812057] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [34495627.812058] CR2: 00007f54407cf000 CR3: 000000100fc8a000 CR4: 00000000003407e0 [34495627.812060] Call Trace: [34495627.812063] <IRQ> [34495627.812069] [<ffffffffabcced3f>] __wake_up_sync_key+0x4f/0x60 [34495627.812075] [<ffffffffac21d43a>] sock_def_readable+0x3a/0x70 [34495627.812079] [<ffffffffac2722af>] __netlink_sendskb+0x5f/0x180 [34495627.812085] [<ffffffffabef7d8c>] ? security_sock_rcv_skb+0x1c/0x20 [34495627.812088] [<ffffffffac27551b>] netlink_unicast+0x1db/0x210 [34495627.812091] [<ffffffffac2267cb>] ? skb_zerocopy+0x1fb/0x380 [34495627.812103] [<ffffffffc0d93153>] queue_userspace_packet+0x2f3/0x5c0 [openvswitch] [34495627.812111] [<ffffffffc0d95b15>] ovs_dp_upcall+0x65/0x70 [openvswitch] [34495627.812120] [<ffffffffc0d95c49>] ovs_dp_process_packet+0x129/0x150 [openvswitch] [34495627.812129] [<ffffffffc0da124b>] ovs_vport_receive+0x7b/0xe0 [openvswitch] [34495627.812133] [<ffffffffabd88e0c>] ? irq_work_run+0x2c/0x40 [34495627.812136] [<ffffffffabca12da>] ? irq_exit+0xda/0x110 [34495627.812141] [<ffffffffabc57159>] ? smp_call_function_interrupt+0x39/0x40 [34495627.812144] [<ffffffffac377382>] ? call_function_interrupt+0x162/0x170 [34495627.812148] [<ffffffffabce38a5>] ? arch_scale_smt_power+0x5/0x20 [34495627.812156] [<ffffffffc0da1e67>] netdev_port_receive+0xa7/0x100 [openvswitch] [34495627.812166] [<ffffffffc0da1ede>] netdev_frame_hook+0x1e/0x30 [openvswitch] [34495627.812170] [<ffffffffac238fca>] __netif_receive_skb_core+0x1fa/0xa10 [34495627.812174] [<ffffffffabd0256f>] ? __getnstimeofday64+0x3f/0xd0 [34495627.812177] [<ffffffffac2397f8>] __netif_receive_skb+0x18/0x60 [34495627.812179] [<ffffffffac239880>] netif_receive_skb_internal+0x40/0xc0 [34495627.812182] [<ffffffffac23a508>] napi_gro_receive+0xd8/0x100 [34495627.812199] [<ffffffffc062b5ae>] i40e_clean_rx_irq+0x3ce/0xbd0 [i40e] [34495627.812210] [<ffffffffc062c0ee>] i40e_napi_poll+0x33e/0x7a0 [i40e] [34495627.812214] [<ffffffffac239e9f>] net_rx_action+0x26f/0x390 [34495627.812217] [<ffffffffabca0f85>] __do_softirq+0xf5/0x280 [34495627.812220] [<ffffffffac37832c>] call_softirq+0x1c/0x30 [34495627.812221] <EOI> [34495627.812225] [<ffffffffabc2e675>] do_softirq+0x65/0xa0 [34495627.812228] [<ffffffffabca03db>] __local_bh_enable_ip+0x9b/0xb0 [34495627.812231] [<ffffffffac36a25e>] _raw_spin_unlock_bh+0x1e/0x20 [34495627.812238] [<ffffffffc0d97387>] ovs_flow_stats_get+0x87/0x100 [openvswitch] [34495627.812245] [<ffffffffc0d9232e>] ovs_flow_cmd_fill_info+0xee/0x2b0 [openvswitch] [34495627.812252] [<ffffffffc0d94860>] ovs_flow_cmd_dump+0x120/0x180 [openvswitch] [34495627.812255] [<ffffffffac272ca4>] netlink_dump+0xd4/0x2b0 [34495627.812257] [<ffffffffac2730e5>] netlink_recvmsg+0x265/0x490 [34495627.812260] [<ffffffffac2194f5>] sock_recvmsg+0xc5/0x100 [34495627.812264] [<ffffffffabce04c9>] ? update_cfs_shares+0xa9/0xf0 [34495627.812267] [<ffffffffabcdeb6b>] ? should_numa_migrate_memory+0x5b/0x150 [34495627.812270] [<ffffffffac21a673>] ___sys_recvmsg+0x133/0x2d0 [34495627.812274] [<ffffffffabdebced>] ? handle_mm_fault+0x39d/0x9b0 [34495627.812277] [<ffffffffac21bb91>] __sys_recvmsg+0x51/0x90 [34495627.812280] [<ffffffffac21bbe2>] SyS_recvmsg+0x12/0x20 [34495627.812283] [<ffffffffac374ddb>] system_call_fastpath+0x22/0x27 [34495627.812284] Code: 07 00 66 66 66 90 5d c3 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 89 e5 c6 07 00 66 66 66 90 48 89 f7 57 9d <66> 66 90 66 90 5d c3 0f 1f 40 00 66 66 66 66 90 55 48 89 e5 48 [34495627.812313] Kernel panic - not syncing: softlockup: hung tasks [34495627.813676] CPU: 42 PID: 113801 Comm: ovs-vswitchd Kdump: loaded Tainted: G W OELK------------ T 3.10.0-957.1.1.el7.x86_64 #1 [34495627.817205] Call Trace: [34495627.818603] <IRQ> [<ffffffffac361d89>] dump_stack+0x19/0x1b [34495627.819887] [<ffffffffac35b498>] panic+0xe8/0x21f [34495627.821062] [<ffffffffabc2e8b8>] ? show_regs+0x58/0x210 [34495627.822248] [<ffffffffabd48e71>] watchdog_timer_fn+0x231/0x240 [34495627.823438] [<ffffffffabd48c40>] ? watchdog+0x40/0x40 [34495627.824611] [<ffffffffabcc6013>] __hrtimer_run_queues+0xf3/0x270 [34495627.825747] [<ffffffffabcc659f>] hrtimer_interrupt+0xaf/0x1d0 [34495627.826888] [<ffffffffabc5a3bb>] local_apic_timer_interrupt+0x3b/0x60 [34495627.828049] [<ffffffffac3796c3>] smp_apic_timer_interrupt+0x43/0x60 [34495627.829194] [<ffffffffac375df2>] apic_timer_interrupt+0x162/0x170 [34495627.830345] [<ffffffffac36a0b5>] ? _raw_spin_unlock_irqrestore+0x15/0x20 [34495627.831511] [<ffffffffabcced3f>] __wake_up_sync_key+0x4f/0x60 [34495627.832677] [<ffffffffac21d43a>] sock_def_readable+0x3a/0x70 [34495627.833841] [<ffffffffac2722af>] __netlink_sendskb+0x5f/0x180 [34495627.835327] [<ffffffffabef7d8c>] ? security_sock_rcv_skb+0x1c/0x20 [34495627.836415] [<ffffffffac27551b>] netlink_unicast+0x1db/0x210 [34495627.837473] [<ffffffffac2267cb>] ? skb_zerocopy+0x1fb/0x380 [34495627.838524] [<ffffffffc0d93153>] queue_userspace_pa [34495627.839576] [<ffffffffc0d95b15>] ovs_dp_upcall+0x65/0x70 [openvswitch] [34495627.840639] [<ffffffffc0d95c49>] ovs_dp_process_packet+0x129/0x150 [openvswitch] [34495627.841706] [<ffffffffc0da124b>] ovs_vport_receive+0x7b/0xe0 [openvswitch] [34495627.842761] [<ffffffffabd88e0c>] ? irq_work_run+0x2c/0x40 [34495627.843824] [<ffffffffabca12da>] ? irq_exit+0xda/0x110 [34495627.844869] [<ffffffffabc57159>] ? smp_call_function_interrupt+0x39/0x40 [34495627.845910] [<ffffffffac377382>] ? call_function_interrupt+0x162/0x170 [34495627.846940] [<ffffffffabce38a5>] ? arch_scale_smt_power+0x5/0x20 [34495627.847966] [<ffffffffc0da1e67>] netdev_port_receive+0xa7/0x100 [openvswitch] [34495627.849213] [<ffffffffc0da1ede>] netdev_frame_hook+0x1e/0x30 [openvswitch] [34495627.850328] [<ffffffffac238fca>] __netif_receive_skb_core+0x1fa/0xa10 [34495627.851311] [<ffffffffabd0256f>] ? __getnstimeofday64+0x3f/0xd0 [34495627.852274] [<ffffffffac2397f8>] __netif_receive_skb+0x18/0x60 [34495627.853223] [<ffffffffac239880>] netif_receive_skb_internal+0x40/0xc0 [34495627.854149] [<ffffffffac23a508>] napi_gro_receive+0xd8/0x100 [34495627.855071] [<ffffffffc062b5ae>] i40e_clean_rx_irq+0x3ce/0xbd0 [i40e] [34495627.855986] [<ffffffffc062c0ee>] i40e_napi_poll+0x33e/0x7a0 [i40e] [34495627.856868] [<ffffffffac239e9f>] net_rx_action+0x26f/0x390 [34495627.857737] [<ffffffffabca0f85>] __do_softirq+0xf5/0x280 [34495627.858615] [<ffffffffac37832c>] call_softirq+0x1c/0x30 [34495627.859497] <EOI> [<ffffffffabc2e675>] do_softirq+0x65/0xa0 [34495627.860369] [<ffffffffabca03db>] __local_bh_enable_ip+0x9b/0xb0 [34495627.861225] [<ffffffffac36a25e>] _raw_spin_unlock_bh+0x1e/0x20 [34495627.862059] [<ffffffffc0d97387>] ovs_flow_stats_get+0x87/0x100 [openvswitch] [34495627.862886] [<ffffffffc0d9232e>] ovs_flow_cmd_fill_info+0xee/0x2b0 [openvswitch] [34495627.863712] [<ffffffffc0d94860>] ovs_flow_cmd_dump+0x120/0x180 [openvswitch] [34495627.864575] [<ffffffffac272ca4>] netlink_dump+0xd4/0x2b0 [34495627.865602] [<ffffffffac2730e5>] netlink_recvmsg+0x265/0x490 [34495627.866353] [<ffffffffac2194f5>] sock_recvmsg+0xc5/0x100 [34495627.867094] [<ffffffffabce04c9>] ? update_cfs_shares+0xa9/0xf0 [34495627.867834] [<ffffffffabcdeb6b>] ? should_numa_migrate_memory+0x5b/0x150 [34495627.868575] [<ffffffffac21a673>] ___sys_recvmsg+0x133/0x2d0 [34495627.869316] [<ffffffffabdebced>] ? handle_mm_fault+0x39d/0x9b0 [34495627.870058] [<ffffffffac21bb91>] __sys_recvmsg+0x51/0x90 [34495627.870801] [<ffffffffac21bbe2>] SyS_recvmsg+0x12/0x20 [34495627.871538] [<ffffffffac374ddb>] system_call_fastpath+0x22/0x27 I find the mail list in ovs community, find one modifiy: https://patchwork.ozlabs.org/project/netdev/patch/1395929134-4487-1-git-send-email-...@redhat.com/#741770 I find this commit only modify function ovs_flow_stats_clear and ovs_flow_stats_get expect ovs_flow_stats_update. then i compare the code between kmod 2.13 and the latest code in kernel, find the two is same. So, now i guess the stuck upper is caused due to ovs_flow_stats_update is not modify, but i'm not sure. I hope i can get an answer, thank you. Yours sincerely, wangchuanlei _______________________________________________ discuss mailing list disc...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-discuss