Stats-read needs to lock stats but same lock is taken in stats update in irq context. Therefore it needs to disable irq to avoid following deadlock :-
BUG: soft lockup - CPU#1 stuck for 23s! [ovs-vswitchd:1425] CPU 1 Pid: 1425, comm: ovs-vswitchd Tainted: G O 3.2.39-server-nn23 #1 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform RIP: 0010:[<ffffffff8103db22>] [<ffffffff8103db22>] __ticket_spin_lock+0x22/0x30 RSP: 0018:ffff88003fd03b30 EFLAGS: 00000297 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000050 RDX: 0000000000000002 RSI: ffff88003d0a9900 RDI: ffff88003ae19598 RBP: ffff88003fd03b30 R08: 0000000000000000 R09: ffff88003ad44048 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88003fd03aa8 R13: ffffffff8164e5de R14: ffff88003fd03b30 R15: ffff88003ae19580 FS: 00007ffb0b428940(0000) GS:ffff88003fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c0ef94000 CR3: 00000000250e2000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ovs-vswitchd (pid: 1425, threadinfo ffff88002514a000, task ffff8800250aae00) Stack: ffff88003fd03b40 ffffffff8164596e ffff88003fd03b70 ffffffffa027622d ffff88003d0a9900 ffffe8ffffd03800 ffff8800297f5a80 ffff88003fd03ba8 ffff88003fd03c60 ffffffffa02759af ffff88003fd03de0 ffff88003fd03e4c Call Trace: <IRQ> [<ffffffff8164596e>] _raw_spin_lock+0xe/0x20 [<ffffffffa027622d>] ovs_flow_stats_update+0x5d/0x100 [openvswitch] [<ffffffffa02759af>] ovs_dp_process_received_packet+0x8f/0x130 [openvswitch] [<ffffffffa027c0ca>] ovs_vport_receive+0x2a/0x30 [openvswitch] [<ffffffffa027db18>] netdev_frame_hook+0xb8/0x120 [openvswitch] [<ffffffffa027da60>] ? free_port_rcu+0x30/0x30 [openvswitch] [<ffffffff81539318>] __netif_receive_skb+0x1c8/0x620 [<ffffffff8153a4c0>] netif_receive_skb+0x80/0x90 [<ffffffff8115f14c>] ? ksize+0x1c/0xc0 [<ffffffff8153a610>] napi_skb_finish+0x50/0x70 [<ffffffff8153ac15>] napi_gro_receive+0xf5/0x140 [<ffffffffa00368ae>] vmxnet3_rq_rx_complete+0x51e/0x7c0 [vmxnet3] [<ffffffff8101ac90>] ? nommu_map_sg+0xe0/0xe0 [<ffffffffa0036da5>] vmxnet3_poll_rx_only+0x45/0xc0 [vmxnet3] [<ffffffff8153ae64>] net_rx_action+0x134/0x290 [<ffffffff8103db0d>] ? __ticket_spin_lock+0xd/0x30 [<ffffffff8106e1a8>] __do_softirq+0xa8/0x210 [<ffffffff8164596e>] ? _raw_spin_lock+0xe/0x20 [<ffffffff8164fd6c>] call_softirq+0x1c/0x30 [<ffffffff81016215>] do_softirq+0x65/0xa0 [<ffffffff8106e58e>] irq_exit+0x8e/0xb0 [<ffffffff81650633>] do_IRQ+0x63/0xe0 [<ffffffff81645e2e>] common_interrupt+0x6e/0x6e ----------- Bug #21853 Reported-by: Pawan Shukla <shuk...@vmware.com> Signed-off-by: Pravin B Shelar <pshe...@nicira.com> --- v2: - push down stats locking. --- AUTHORS | 1 + datapath/flow.c | 53 +++++++++++++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/AUTHORS b/AUTHORS index 1c2d9ea..ac756cf 100644 --- a/AUTHORS +++ b/AUTHORS @@ -205,6 +205,7 @@ Niklas Andersson nanders...@nicira.com Padmanabhan Krishnan kpr...@yahoo.com Pankaj Thakkar thak...@nicira.com Paulo Cravero pcrav...@as2594.net +Pawan Shukla shuk...@vmware.com Peter Balland pe...@nicira.com Peter Phaal peter.ph...@inmon.com Prabina Pattnaik prabina.pattn...@nechclst.in diff --git a/datapath/flow.c b/datapath/flow.c index 9b3d3a7..8be3801 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -87,17 +87,25 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) spin_unlock(&stats->lock); } -static void stats_read(struct flow_stats *stats, +static void stats_read(struct flow_stats *stats, bool lock_bh, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - spin_lock(&stats->lock); + if (lock_bh) + spin_lock_bh(&stats->lock); + else + spin_lock(&stats->lock); + if (time_after(stats->used, *used)) *used = stats->used; *tcp_flags |= stats->tcp_flags; ovs_stats->n_packets += stats->packet_count; ovs_stats->n_bytes += stats->byte_count; - spin_unlock(&stats->lock); + + if (lock_bh) + spin_unlock_bh(&stats->lock); + else + spin_unlock(&stats->lock); } void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, @@ -110,33 +118,38 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); if (!flow->stats.is_percpu) { - stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); + stats_read(flow->stats.stat, true, ovs_stats, used, tcp_flags); } else { cur_cpu = get_cpu(); + for_each_possible_cpu(cpu) { struct flow_stats *stats; - - if (cpu == cur_cpu) - local_bh_disable(); + bool lock_bh; stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); + lock_bh = (cpu == cur_cpu); + stats_read(stats, lock_bh, ovs_stats, used, tcp_flags); } put_cpu(); } } -static void stats_reset(struct flow_stats *stats) +static void stats_reset(struct flow_stats *stats, bool lock_bh) { - spin_lock(&stats->lock); + if (lock_bh) + spin_lock_bh(&stats->lock); + else + spin_lock(&stats->lock); + stats->used = 0; stats->packet_count = 0; stats->byte_count = 0; stats->tcp_flags = 0; - spin_unlock(&stats->lock); + + if (lock_bh) + spin_unlock_bh(&stats->lock); + else + spin_unlock(&stats->lock); } void ovs_flow_stats_clear(struct sw_flow *flow) @@ -144,19 +157,15 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu, cur_cpu; if (!flow->stats.is_percpu) { - stats_reset(flow->stats.stat); + stats_reset(flow->stats.stat, true); } else { cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { + bool lock_bh; - if (cpu == cur_cpu) - local_bh_disable(); - - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); + lock_bh = (cpu == cur_cpu); + stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu), lock_bh); } put_cpu(); } -- 1.7.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev