It is recommended that you upgrade to version 1.10 or above, since this bug is backported only to branch-1.10
If you do not want, you may try this fix of mine, below. It is for branch-1.7 --- ofproto/ofproto-dpif.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 53fe172..30bb28b 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -528,8 +528,8 @@ struct vlan_splinter { int vid; }; -static uint32_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, - uint32_t realdev, ovs_be16 vlan_tci); +static uint16_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, + uint16_t realdev, ovs_be16 vlan_tci); static uint16_t vsp_vlandev_to_realdev(const struct ofproto_dpif *, uint16_t vlandev, int *vid); static bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *); @@ -4665,13 +4665,17 @@ send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) uint16_t odp_port; struct flow flow; int error; + uint16_t vlandev_port; flow_extract((struct ofpbuf *) packet, 0, 0, 0, &flow); - odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port, - flow.vlan_tci); - if (odp_port != ofport->odp_port) { + vlandev_port = vsp_realdev_to_vlandev(ofproto, ofport->up.ofp_port, + flow.vlan_tci); + if (vlandev_port != ofport->up.ofp_port) { + odp_port = ofp_port_to_odp_port(vlandev_port); eth_pop_vlan(packet); flow.vlan_tci = htons(0); + } else { + odp_port = ofport->odp_port; } ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); @@ -4856,6 +4860,7 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci; uint8_t flow_nw_tos = ctx->flow.nw_tos; uint16_t out_port; + uint16_t vlandev_port; if (ofport) { struct priority_to_dscp *pdscp; @@ -4876,11 +4881,15 @@ compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port, * later and we're pre-populating the flow table. */ } - out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port, - ctx->flow.vlan_tci); - if (out_port != odp_port) { + vlandev_port = vsp_realdev_to_vlandev(ctx->ofproto, ofp_port, + ctx->flow.vlan_tci); + if (vlandev_port != ofp_port) { + out_port = ofp_port_to_odp_port(vlandev_port); ctx->flow.vlan_tci = htons(0); + } else { + out_port = odp_port; } + commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions); nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port); @@ -6955,18 +6964,17 @@ hash_realdev_vid(uint16_t realdev_ofp_port, int vid) } /* Returns the ODP port number of the Linux VLAN device that corresponds to - * 'vlan_tci' on the network device with port number 'realdev_odp_port' in - * 'ofproto'. For example, given 'realdev_odp_port' of eth0 and 'vlan_tci' 9, - * it would return the port number of eth0.9. + * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in + * 'struct ofport_dpif'. For example, given 'realdev_ofp_port' of eth0 and + * 'vlan_tci' 9, it would return the port number of eth0.9. * - * Unless VLAN splinters are enabled for port 'realdev_odp_port', this - * function just returns its 'realdev_odp_port' argument. */ -static uint32_t + * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this + * function just returns its 'realdev_ofp_port' argument. */ +static uint16_t vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, - uint32_t realdev_odp_port, ovs_be16 vlan_tci) + uint16_t realdev_ofp_port, ovs_be16 vlan_tci) { if (!hmap_is_empty(&ofproto->realdev_vid_map)) { - uint16_t realdev_ofp_port = odp_port_to_ofp_port(realdev_odp_port); int vid = vlan_tci_to_vid(vlan_tci); const struct vlan_splinter *vsp; @@ -6975,11 +6983,11 @@ vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, &ofproto->realdev_vid_map) { if (vsp->realdev_ofp_port == realdev_ofp_port && vsp->vid == vid) { - return ofp_port_to_odp_port(vsp->vlandev_ofp_port); + return vsp->vlandev_ofp_port; } } } - return realdev_odp_port; + return realdev_ofp_port; } static struct vlan_splinter * -- 1.7.9.5 On Thu, Jun 6, 2013 at 11:27 PM, Kris zhang <zhang.k...@gmail.com> wrote: > Thanks Alex, but how to take this bug fix? Do I have to upgrade to the > latest version of openvswitch? Or just replace the file ofproto-dpif.c in > version 1.7.1 and recompile? > > BTW today i found the lost packets issue again. My previous conclusion > maybe not correct. Let me explain the whole process: > > centos6.0, ovs1.7.1, bonding3.5.0, kernel2.6.32-71.29.1.el6.x86_64, > igb2.4.13 > > 1) Adding a bond0, only has one slave eth0 > 1a) ifconfig bond0 up > 2) ovs-vsctl add-br br0 > 2a) ovs-vsctl set bridge br0 stp_enable=true, > other_config:stp-forward-delay=1 > 2b) ifconfig br0 <host_ip> up > 3) ovs-vsctl add-port bond0 > 3a) ifconfig bond0 up > 4) ovs-vsctl add-br br3000 br0 3000 > 5) tunctl -t taptest > 5a) ifconfig taptest up > 6) ovs-vsctl add-port br3000 taptest > (Above is run by the system when the host booting) > > Ping this host ip, and it will lost 4 packets when the port taptest adding > to br0. > The strang thing is: If i create another tap "tap2", and run command > manually: ovs-vsctl add-port br3000 tap2, it won't lost any packets. > I compared the two tap, it's almost same. the clue is: > In interface table: > ----------------------------------------> taptest > statistics : {stp_error_count=0, stp_rx_count=59, stp_tx_count=886} > status : {stp_port_id="8001", stp_role=designated, > stp_sec_in_state="1394", stp_state=forwarding} > > ----------------------------------------> tap2 > statistics : {stp_error_count=0, stp_rx_count=0, stp_tx_count=55} > status : {stp_port_id="8003", stp_role=designated, > stp_sec_in_state="66", stp_state=forwarding} > > Acutally i dont' know what meaning of above data. But my feeling tell me > this is reason. > So i stop the STP on br0, and it works. I hope above method can help other > people to solve similar problem. > > > Thanks, > Kris > > > > > > > > > On Wed, Jun 5, 2013 at 12:15 AM, Alex Wang <al...@nicira.com> wrote: > >> Hey Kris, >> >> There was a bug in the "add_vsp()" function in "ofproto/ofproto-dpif.c". >> And the fix is in this patch >> http://git.openvswitch.org/cgi-bin/gitweb.cgi?p=openvswitch;a=commit;h=deea120099d23fac3f687ec302351e38a21ee353 >> . >> >> I think this may be the reason for your problem. >> >> Kind Regards, >> Alex Wang >> >> >> On Tue, Jun 4, 2013 at 8:07 AM, Kris zhang <zhang.k...@gmail.com> wrote: >> >>> Thanks Jesse, I solved this issue through upgrade the NIC driver igb >>> from 2.1 to 2.4, now it is ok even if use Linux bond as the external port >>> of ovs bridge. And i will try to use OVS bonding once i have a chance. Also >>> i hope FAQ can list which NIC driver has vlan problem, and should be >>> upgrade to which version. >>> >>> Thanks, >>> Kris >>> >>> >>> On Thu, May 30, 2013 at 7:22 AM, Jesse Gross <je...@nicira.com> wrote: >>> >>>> Linux bonds don't pass vlan information through to the driver. If you >>>> use OVS bonding then you shouldn't have this problem. >>>> >>>> On Wed, May 29, 2013 at 11:08 AM, Kris zhang <zhang.k...@gmail.com> >>>> wrote: >>>> > I found the reason, the problem is not caused by vlan splinters, it >>>> caused >>>> > by bond0. I don't know why, but if remove the bond0, and ovs-br0 >>>> directly >>>> > connect to eth0, the problem disappears. >>>> > >>>> > >>>> > >>>> > >>>> > On Mon, May 27, 2013 at 10:10 AM, Kris zhang <zhang.k...@gmail.com> >>>> wrote: >>>> >> >>>> >> Hi Jesse, >>>> >> >>>> >> Because if i don't use the vlan splinters, the VMs' network traffic >>>> will >>>> >> be nearly 0 M/s (but they can ping each other). I read FAQ, it says >>>> the >>>> >> reason maybe the NIC driver or Linux kernel version problems, and it >>>> also >>>> >> says the vlan splinters may solve this issue, but they don't said >>>> the ping >>>> >> will miss some packets when add a new vlan. is it a bug for vlan >>>> splinters? >>>> >> Please see the images in the attachment. >>>> >> >>>> >> Thanks, >>>> >> Kris >>>> >> >>>> >> >>>> >> On Thu, May 23, 2013 at 11:25 PM, Jesse Gross <je...@nicira.com> >>>> wrote: >>>> >>> >>>> >>> It seems that the original issue was that you were using VLANs when >>>> >>> you shouldn't have. In that case, why are you trying to use VLAN >>>> >>> splinters? >>>> >>> >>>> >>> On Thu, May 23, 2013 at 2:44 AM, Kris zhang <zhang.k...@gmail.com> >>>> wrote: >>>> >>> > Thanks Jesse, but i still cannot fix my issue. Because if i use >>>> vlan >>>> >>> > splinters, the above issue will be happened, if i don't use vlan >>>> >>> > splinters, >>>> >>> > the packets between the VMs are very slow (ping is ok), i use >>>> ovs-dpctl >>>> >>> > dump-flows br0, get the following result: >>>> >>> > >>>> >>> > >>>> >>> > >>>> in_port(1),eth(src=c6:b0:ea:37:29:47,dst=c6:b0:3a:c0:0d:55),eth_type(0x8100),vlan(vid=3000,pcp=0),encap(eth_type(0x0800),ipv4(src=20.1.120.12,dst=20.1.120.13,proto=1,tos=1,ttl=64,frag=no),icmp(type=8,code=0)), >>>> >>> > packets:101, bytes:10302, used:4.341s, actions=pop_vlan,3 >>>> >>> > >>>> >>> > >>>> in_port(3),eth(src=c6:b0:3a:c0:0d:55,dst=c6:b0:ea:37:29:47),eth_type(0x0800),ipv4(src=20.1.120.13,dst=20.1.120.12,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)), >>>> >>> > packets:101, bytes:9898, used:4.341s, >>>> >>> > actions=push_vlan(vid=3000,pcp=0),1 >>>> >>> > >>>> >>> > The port 1 is eth1, and the port 3 is the tap of VM. >>>> >>> > Does the iptables affect the ovs? If not, i have to upgrade the >>>> linux >>>> >>> > kernel, or upgrade NIC driver. >>>> >>> > >>>> >>> > Thanks, >>>> >>> > Kris >>>> >>> > >>>> >>> > >>>> >>> > >>>> >>> > >>>> >>> > On Tue, May 21, 2013 at 3:32 AM, Jesse Gross <je...@nicira.com> >>>> wrote: >>>> >>> >> >>>> >>> >> There's an extensive section in the FAQ about vlans that I would >>>> >>> >> recommend reading. >>>> >>> >> >>>> >>> >> On Mon, May 20, 2013 at 8:51 AM, Kris zhang < >>>> zhang.k...@gmail.com> >>>> >>> >> wrote: >>>> >>> >> > No, so you mean it maybe not caused by vlan splinters? >>>> >>> >> > >>>> >>> >> > >>>> >>> >> > On Mon, May 20, 2013 at 11:18 PM, Jesse Gross < >>>> je...@nicira.com> >>>> >>> >> > wrote: >>>> >>> >> >> >>>> >>> >> >> On Mon, May 20, 2013 at 4:37 AM, Kris zhang < >>>> zhang.k...@gmail.com> >>>> >>> >> >> wrote: >>>> >>> >> >> > Hi guys, >>>> >>> >> >> > >>>> >>> >> >> > I use ovs-1.7.1, and i run ovs on a single NIC host >>>> (CentOS): >>>> >>> >> >> > >>>> >>> >> >> > # ovs-vsctl add-br br0 >>>> >>> >> >> > # ovs-vsctl add-port br0 eth0 >>>> >>> >> >> > >>>> >>> >> >> > Then i setup the eth0 interface's other_config: >>>> >>> >> >> > enable-vlan-splinters="true". >>>> >>> >> >> > >>>> >>> >> >> > Last I ping this host by another PC, and at same time i do >>>> this: >>>> >>> >> >> > >>>> >>> >> >> > # ovs-vsctl add-port taptest br0 tag=100 >>>> >>> >> >> > >>>> >>> >> >> > I found there are 4 "Request timed out." happened. That >>>> means the >>>> >>> >> >> > host >>>> >>> >> >> > lost >>>> >>> >> >> > connect about 4 seconds. >>>> >>> >> >> > >>>> >>> >> >> > If i remove the vlan splinters on interface eth0. it won't >>>> >>> >> >> > happen. >>>> >>> >> >> > So does anybody know the reason? >>>> >>> >> >> >>>> >>> >> >> Is the other machine actually on that VLAN? >>>> >>> >> > >>>> >>> >> > >>>> >>> > >>>> >>> > >>>> >> >>>> >> >>>> > >>>> >>> >>> >>> _______________________________________________ >>> discuss mailing list >>> discuss@openvswitch.org >>> http://openvswitch.org/mailman/listinfo/discuss >>> >>> >> >
_______________________________________________ discuss mailing list discuss@openvswitch.org http://openvswitch.org/mailman/listinfo/discuss