Hi Guru,

The rule which responds to the ARP request for the floating ip is
installed only on the hypervisor which hosts the VM mapped to that
floating ip. This is done through a hack right now. When the NB is
installing this rule, it appends a key-value "lport: <lport of vm>"
to the external-id's column of the lflow. When the controller is
processing the lflows, if it sees a "lport" key-value in the flow,
it will verify if the lport is a local resident and only then it will
install the rule.


Here is the packet walk flow of all the scenarios. This implementation
does not use the CONNTRACK NAT support. Since floating ip is a case of
1-1 IP mapping we are relying on just modifying the DIP and the SIP.


Topology:

+----------+ +----------+                       +----------+ +----------+
| VM1      | | VM2      |                       | VM1      | | VM2      |
| 10.1.1.3 | | 10.1.1.4 |                       | 10.1.2.5 | | 10.1.2.6 |
| MAC:m_vm1| | MAC:m_vm2|                       | MAC:m_vm1| | MAC:m_vm2|
+----------+ +----------+                       +----------+ +----------+
   |           |                                         |           |
   |           |                                         |           |
   |           |                                         |           |
  PVM1       PVM2               *********                PVM3       PVM4
+----------------+            **         **            +----------------+
|                |          **   *      *  **          |                |
|    Net1        |         *       *  *      *         |     Net2       |
|  10.1.1.0/24   |P1----RP1*      ROUTER     *RP2----R2| 10.1.2.0/24    |
| GW: 10.1.1.1   |         *       *  *      *         | GW: 10.1.2.1   |
| GW MAC: m_net1 |          **   *      *  **          | GW MAC: m_net2 |
+----------------+            **         **            +----------------+
                                *********
                                   RP3
                                    |
                                    |
                                    |
 +-----------------+                P3                 +-----------------+
 | FIP1:198.44.1.3 |       +-------------------+       | FIP1:198.44.1.5 |
 | MAC: m_fip1     |-------|    Provider Net   |-------| MAC: m_fip3     |
 | VM: VM1         |       |  198.44.1.0/24    |       | VM: VM3         |
 +-----------------+       |  GW: 198.44.1.254 |       +-----------------+
 | FIP1:198.44.1.4 |-------|RouterIP:198.44.1.2|-------| FIP: 198.44.1.6 |
 | MAC: m_fip2     |       |RouterMac: m_pn1   |       | MAC: m_fip4     |
 | VM: VM2         |       +-------------------+       | VM: VM4         |
 +-----------------+             localnet              +-----------------+
                                    |
                                    |
                                    |
   +-------------+           +-----------------+           +------------+
   |  EXT HOST   |           | Physical Network|           | Gateway    |
   | 198.44.1.253|-----------|  198.44.1.0/24  |-----------|198.44.1.254|
   |  MAC: m_eh  |           |                 |           | MAC: m_gw  |
   +-------------+           +-----------------+           +------------+



Router Ports:
-------------
    RP1 --> P1 (Net1) IP: 10.1.1.1, MAC: m_net1
    RP2 --> P2 (Net2) IP: 10.1.2.1, MAC: m_net2
    RP3 --> P3 (Provider Net) IP: 198.44.1.2, MAC: m_pn1

Floating ip Mappings:
--------------------
    FIP1 mapped to VM1 (198.44.1.3 - 10.1.1.3)
    FIP2 mapped to Vm2 (198.44.1.4 - 10.1.1.4)
    FIP3 mapped to VM3 (198.44.1.5 - 10.1.2.5)
    FIP4 mapped to VM4 (198.44.1.6 - 10.1.2.6

Rules in logical router:
------------------------
 Table=lr_in_admission, P=50, match=(dst==m_fip1 && inport = RP3), action=
    next;
 Table=lr_in_admission, P=50, match=(dst==m_fip2 && inport = RP3), action=
    next;
 Table=lr_in_admission, P=50, match=(dst==m_fip3 && inport = RP3), action=
    next;
 Table=lr_in_admission, P=50, match=(dst==m_fip4 && inport = RP3), action=
    next;

 Table=lr_in_ip_input, P=0, action=next;

 Table=lr_in_ip_dnat, P=100, match=(SIP == 10.1.1.0/24 && DIP ==
    10.1.2.0/24); action (reg2 = 0x1;next)
 Table=lr_in_ip_dnat, P=100, match=(SIP == 10.1.2.0/24 && DIP ==
    10.1.1.0/24); action (reg2 = 0x1;next)
 Table=lr_in_ip_dnat, P=90, match=(DIP == 192.168.1.3), action=(
    DIP=10.1.1.3; inport=" "; next;)
 Table=lr_in_ip_dnat, P=90, match=(DIP == 192.168.1.4), action=(
    DIP=10.1.1.4; inport= " "; next;)
 Table=lr_in_ip_dnat, P=90, match=(DIP == 192.168.1.5), action=(
    DIP=10.1.2.5; inport = " "; next;)
 Table=lr_in_ip_dnat, P=90, match=(DIP == 192.168.1.6), action=(
    DIP=10.1.2.6; inport = " "; next;)
 Table=lr_in_ip_dnat, P=0, action=next;

 Table=lr_in_ip_routing, P=24 match=(DIP=10.1.1.0/24), action=(ip.ttl--,
    SMAC=m_net1;outport=RP1;next;)
 Table=lr_in_ip_routing, P=24 match=(DIP=10.1.2.0/24), action=(ip.ttl--,
    SMAC=m_net3;outport=RP2;next;)
 Table=lr_in_ip_routing, P=24 match=(DIP=198.44.1.0/24), action=(ip.ttl--,
    SMAC=m_pn1;outport=RP3;next;)


 Table=lr_in_ip_snat, P=100, match=(reg2==0x1), actions=(next;)
 Table=lr_in_ip_snat, P=90, match=(SIP == 10.1.1.3 && outport == RP3),
    action=(SIP=198.44.1.3; SMAC=m_fip1; next;)
 Table=lr_in_ip_snat, P=90, match=(SIP == 10.1.1.4 && outport == RP3),
    action=(SIP=198.44.1.4; SMAC=m_fip1; next;)
 Table=lr_in_ip_snat, P=90, match=(SIP == 10.1.2.5 && outport == RP3),
    action=(SIP=198.44.1.5; SMAC=m_fip1; next;)
 Table=lr_in_ip_snat, P=90, match=(SIP == 10.1.2.6 && outport == RP3),
    action=(SIP=198.44.1.6; SMAC=m_fip1; next;)
 Table=lr_in_ip_snat, P=50, match=(SIP == 10.1.1.3), action=(
    SIP=198.44.1.3;next;)
 Table=lr_in_ip_snat, P=50, match=(SIP == 10.1.1.4), action=(
    SIP=198.44.1.4;next;)
 Table=lr_in_ip_snat, P=50, match=(SIP == 10.1.2.5), action=(
    SIP=198.44.1.5;next;)
 Table=lr_in_ip_snat, P=50, match=(SIP == 10.1.2.6), action=(
    SIP=198.44.1.6;next;)
 Table=lr_in_ip_snat, P=0, action=next; SIP=198.44.1.6;next;)


_____________________________________________________
Scenario 1: North <--> South (VM1 <--> External Host)
_____________________________________________________

 [IN PVM1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.253

 1. Packet ingresses the Net1 logical switch via port PVM1 and will egress
    out of port P1 since the DMAC matches the router port's mac address.

 [OUT P1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.253
 [IN RP1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.253

 2. Packet ingresses logical router via RP1 and passes through the
    security rules.
 3. Packet gets processed in IP_DNAT stage where the priority 0 rule will
    be hit, since the DIP does not match any of the FIPs. The packet is
    passed on to the next stage without any modifications.
 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_pn1 and the outport is set to RP3.
 5. Packet enters IP_SNAT stage where priority 90 rule will be hit which
    will change the SMAC from m_vm1 to m_fip1 and the SIP from
    10.1.1.3(VM1) to 198.44.1.3(FIP1).

 m_fip1, 198.44.1.3 -> m_net1, 198.44.1.253

 6. Packet gets processed in ARP_RESOLVE stage and once the arp is resolved
    for 198.44.1.253, the DMAC of the packet will be changed to m_eh and
    the packet will egress out of the router via port RP3.

 [OUT RP3]:   m_fip1, 198.44.1.3 -> m_eh, 198.44.1.253
 [IN  P3 ]:   m_fip1, 198.44.1.3 -> m_eh, 198.44.1.253

 7. Packet ingresses the provider network logical switch via port P3. The
    ingress security rules on port P3 allow packets with a SMAC that
    matches port P3 mac or the mac of the FIPs. Finally after the packet
    passes through rest of the stages, it will egress out of the localnet
    port.

 [OUT localnet]: m_fip1, 198.44.1.3 -> m_eh, 198.44.1.253

    External Host 198.44.1.253 responds to 198.44.1.3

 [IN  localnet]: m_eh, 198.44.1.253 -> m_fip1, 198.44.1.3

 1. Packet ingresses the provider network logical switch via localnet port.
    In the L2_LKUP stage priority 50 will match the DMAC of the packet to
    the mac of FIP1 and the outport will be set to P3. Packet will egress
    the provider network logical switch via port P3. The egress security
    rules on port P3 allow packets with a DMAC that match the mac of
    port P3 or the mac of the FIPs.

 [OUT  P3]: m_eh, 198.44.1.253 -> m_fip1, 198.44.1.3
 [IN  RP3]: m_eh, 198.44.1.253 -> m_fip1, 198.44.1.3

 2. Packet ingresses logical router via port RP3. The ingress security
    rules on port RP3 will allow packets with a DMAC that match the mac of
    port P3 or the mac of the FIPs.
 3. Packet gets processed in IP_DNAT stage, where the priority 90 rule will
    be hit, which will change the destination IP from 198.44.1.3 to
    10.1.1.3.

 m_eh, 198.44.1.253 -> m_fip1, 10.1.1.3

 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_net1 and the outport is set to P1.

 m_net1, 198.44.1.253 -> m_fip1, 10.1.1.3

 5. Packet gets processed in IP_SNAT stage where the priority 0 rule will
    be hit, since the source IP does not match any of the VM's that have a
    FIP. The packet will be sent to the next stage without any changes.
 6. Packets gets processed in ARP_RESOLVE stage where the DMAC of the
    packet will be changed to the m_vm1

 m_net1, 198.44.1.253 -> m_vm1, 10.1.1.3

 7. Packet egresses out of the logical router via port RP1 and ingresses
    the Net1 logical switch via port P1. The logical switch processes the
    packet and sends to the vm port PVM1.

 [OUT PVM1]: m_net1, 198.44.1.253 -> m_vm1, 10.1.1.3

________________________________________________
Scenario 2: East <--> West via FIP (VM1 -> FIP3)
________________________________________________

 [IN PVM1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.5

 1. Packet ingresses the Net1 logical switch on port PVM1 and will egress
    out of port P1 since the DMAC matches the router port's mac address.

 [OUT P1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.5
 [IN RP1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.5

 2. Packet ingresses the logical router on port RP1 and passes through the
    ingress security rules.
 3. Packet gets processed in IP_DNAT stage where the priority 90 rule will
    be hit, since the DIP of the packet matches the FIP3 IP. The DIP of the
    packet will be changed from 198.44.1.5(FIP3 IP) to 10.1.2.5 (VM3 IP)

 m_vm1, 10.1.1.3 -> m_net1, 10.1.2.5

 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_net2 and the outport is set to RP2.

 m_net2, 10.1.1.3 -> m_net1, 10.1.2.5

 5. Packet enters IP_SNAT stage where the priority 50 rule will be hit and
    the SIP is changed from 10.1.1.3(VM1 IP) to 198.44.1.3(FIP1 IP).
    Priority 90 rules do not match here, since the outport is not RP3.

 m_net2, 198.44.1.3 -> m_net1, 10.1.2.5

 6. Packet gets processed in ARP_RESOLVE stage where the piority 100 rule
    will be hit and the DMAC of the packet is changed to VM3 mac (m_vm3)

 m_net2, 198.44.1.3 -> m_vm3, 10.1.2.5

 7. Packet egresses out of the logical router via port RP2.

 [OUT RP2]: m_net2, 198.44.1.3 -> m_vm3, 10.1.2.5
 [IN  P2 ]: m_net2, 198.44.1.3 -> m_vm3, 10.1.2.5

 8. The packet ingresses Net2 logical switch on port P2 and after further
    processing, the packet will be sent out of port PVM3 to VM3.

 [OUT PVM3]: m_net2, 198.44.1.3 -> m_vm3, 10.1.2.5

    VM 10.1.2.5 responds to 198.44.1.3

 [IN PVM3]: m_vm3, 10.1.2.5  -> m_net2, 198.44.1.3

 1. Packet ingresses the Net2 logical switch on port PVM3 and will egress
    out of port P2 since the DMAC matches the router port's mac address.

 [OUT P2]: m_vm3, 10.1.2.5  -> m_net2, 198.44.1.3
 [IN RP2]: m_vm3, 10.1.2.5  -> m_net2, 198.44.1.3

 2. Packet ingresses logical router via port RP2 and passes through the
    ingress security rules.
 3. Packet gets processed in IP_DNAT stage, where the priority 90 rule will
    be hit and the DIP is changed from 198.44.1.3 (FIP1 IP) to 10.1.1.3
   (VM1 IP).

 m_vm3, 10.1.2.5  -> m_net2, 10.1.1.3

 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_net1 and the outport is set to RP1.

 m_net1, 10.1.2.5  -> m_net2, 10.1.1.3

 5. Packet enters IP_SNAT stage where the priority 50 rule will be hit and
    the src ip is changed from 10.1.2.5(VM3 IP) to 198.44.1.5(FIP3 IP).
    The priority 90 rule does not match in this stage since the outport is
    not RP3.

 m_net1, 198.44.1.5  -> m_net2, 10.1.1.3

 6. Packet gets processed in ARP_RESOLVE stage where the DMAC of the packet
    will be changed to the m_vm1

 m_net1, 198.44.1.5  -> m_vm1, 10.1.1.3

 7. Packet egresses out of the logical router via port RP1.

 [OUT RP1]: m_net1, 198.44.1.5  -> m_vm1, 10.1.1.3
 [IN   P1]: m_net1, 198.44.1.5  -> m_vm1, 10.1.1.3

 8. Packet ingresses the Net1 logical switch via port P1 and after further
    processing the packet will be sent out of port PVM1.

 [OUT PVM1]: m_net1, 198.44.1.5  -> m_vm1, 10.1.1.3

_________________________________________________________
Scenario 3: East <--> West via private IPs (VM1 <--> VM3)
_________________________________________________________

 [IN PVM1]: m_vm1, 10.1.1.3 -> m_net1, 10.1.2.5

 1. Packet ingresses the Net1 logical switch on port PVM1 and will egress
    out of port P1 since the DMAC matches the router port's mac address.

 [OUT P1]: m_vm1, 10.1.1.3 -> m_net1, 10.1.2.5
 [IN RP1]: m_vm1, 10.1.1.3 -> m_net1, 10.1.2.5

 2. Packets ingresses logical router via RP1 and passes through the ingress
    security rules
 3. Packet gets processed in IP_DNAT stage where the priority 100 rule will
    be hit and reg2 is set to 0x1. The priority 100 rules are programmed to
    match the East-West packets that are not using FIPs to communicate.
 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_net2 and the outport is set to RP2.

 m_net2, 10.1.1.3 -> m_net1, 10.1.2.5

 5. Packet gets processed in IP_SNAT stage where priority 100 rule will be
    hit, since it matches on reg2 which was set to 0x1 in IP_DNAT stage.
    There is no modification done to the packet in the stage
 6. Packet gets processed in ARP_RESOLVE stage where the priority 100 rule
    will be hit and the DMAC of the packet is changed to m_vm3.

 m_net2, 10.1.1.3 -> m_vm3, 10.1.2.5

 7. Packet egresses out of the logical router via port RP2.

 [OUT RP2]: m_net2, 10.1.1.3 -> m_vm3, 10.1.2.5
 [IN   P2]: m_net2, 10.1.1.3 -> m_vm3, 10.1.2.5

 8. Packet ingresses the Net2 logical switch on port P2 and after further
    processing, the packet will egress out of port PVM3.

 [OUT PVM3]: m_net2, 10.1.1.3 -> m_vm3, 10.1.2.5

        VM 10.1.2.5 responds to VM 10.1.1.3

 [IN PVM3]: m_vm3, 10.1.2.5 -> m_net2, 10.1.1.3

 The return path from 10.1.2.5 --> 10.1.1.3 will follow the exact same
 path as outlined in steps 1-8 above. The packet is not altered in the
 IP_DNAT and the IP_SNAT stages.

 [OUT PVM1]: m_net1, 10.1.2.5 -> m_vm1, 10.1.1.3

_______________________________________________________________________
Scenario 4: East <--> West via FIP, both endpoints on the same logical
switch. (VM1 <--> FIP2 (VM2))
_______________________________________________________________________

 [IN PVM1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.4

 1. Packet ingresses the Net1 logical switch on port PVM1 and will egress
    out of port P1 since the DMAC matches the router port's mac address.

 [OUT P1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.4
 [IN RP1]: m_vm1, 10.1.1.3 -> m_net1, 198.44.1.4

 2. Packet ingresses the logical router on port RP1 and passes through the
    ingress security rules.
 3. Packet gets processed in IP_DNAT stage where the priority 90 rule will
    be hit, since the DIP of the packet matches the FIP3 IP. The DIP of the
    packet will be changed from 198.44.1.4(FIP3 IP) to 10.1.1.4 (VM3 IP).
    The rule will also reset the inport, since this packet should egress
    out on the same interface it ingresses in.

 m_vm1, 10.1.1.3 -> m_net1, 10.1.1.4 (inport = " ")

 4. Packet gets processed in the ROUTING stage and the SMAC is changed to
    m_net1 and the outport is set to RP1.

 m_net1, 10.1.1.3 -> m_net1, 10.1.1.4 (inport = " ")

 5. Packet enters IP_SNAT stage where the priority 50 rule will be hit and
    the SIP is changed from 10.1.1.3(VM1 IP) to 198.44.1.3(FIP1 IP).
    Priority 90 rules do not match here, since the outport is not RP3.

 m_net1, 198.44.1.3 -> m_net1, 10.1.1.4

 6. Packet gets processed in ARP_RESOLVE stage where the priority 100 rule
    will be hit and the DMAC of the packet is changed to VM2 mac (m_vm2)

 m_net1, 198.44.1.3 -> m_vm2, 10.1.1.4

 7. Packet egresses out of the logical router via port RP1. The egress
    rules drops packets which match inport == outport, but in this case
    the inport was reset in IP_DNAT stage, so the packet egresses out.

 [OUT RP1]: m_net1, 198.44.1.3 -> m_vm2, 10.1.1.4
 [IN  P1 ]: m_net1, 198.44.1.3 -> m_vm2, 10.1.1.4

 8. The packet ingresses Net1 logical switch on port P1 and after further
    processing, the packet will be sent out of port PVM2 to VM2.

 [OUT PVM2]: m_net1, 198.44.1.3 -> m_vm2, 10.1.1.4

    VM 10.1.1.4 responds to 198.44.1.3

 [IN PVM2]: m_vm2, 10.1.1.4 -> m_net1, 198.44.1.3

 The return path from 10.1.1.4 --> 198.44.1.3 will follow the exact same
 path as outlined in steps 1-8 above. The DIP gets modified in IP_DNAT
 stage to 198.44.1.4 and the inport is reset. In the IP_SNAT stage the SIP
 gets modified to 10.1.1.3.

 [OUT PVM1]: m_net1, 198.44.1.4 -> m_vm1, 10.1.1.3

Guru Shetty <g...@ovn.org> wrote on 03/25/2016 10:56:20 AM:

> From: Guru Shetty <g...@ovn.org>
> To: Chandra Sekhar Vejendla/San Jose/IBM@IBMUS
> Cc: ovs dev <dev@openvswitch.org>
> Date: 03/25/2016 10:56 AM
> Subject: Re: [ovs-dev] [PATCH RFC] OVN: Openstack floating ip support
>
> It would really help if you give packet walkthrough when following 2
> simultaneous connections happen at the same time.
> 1. east-west without NAT between the private IP addresses
> 2  east-west with floating IP.
>
> You also mentioned in the meeting (if I remember correctly) on how
> you have to keep a particular interface pinned to a hypervisor. A
> little description on that would help.
>
> Also, does this work with overlay networking?
>
> On 22 March 2016 at 14:19, Chandra S Vejendla <csvej...@us.ibm.com>
wrote:

> This patch adds distributed floating ip support for ovn. The assumption
made
> here is that the external network is a single L2 broadcast domain and all
the
> chassis have connectivity to the external network.
>
> 2 new tables are added in the LROUTER pipeline IN_IP_DNAT & IP_IN_SNAT.
> IN_IP_DNAT will modify the dst ip of the packet from floating ip to vm
ip.
> IN_IP_SNAT will modify the src ip of the packet from vm ip to floating
ip.
>
> Rules in IN_IP_DNAT:
> - Priority 100 rule to set the reg2 to 0x1 if dst & src networks are
>   connectected via a router and both the networks are private.
> - Priority 90 rule to modify the dst ip from floating ip to vm ip.
> - Priority 0 rule to go to next table.
>
> Rules in IN_IP_SNAT:
> - Priority 100 rule to skip modifying the src ip when reg2 is set to 0x1
> - Priority 90 rule to modify the src ip from vm ip to floating ip and dst
mac
>   to floating ip port mac if the packet is egressing via the gateway port
> - Priority 50 rule to modify the src ip from vm ip to floating ip
> - Priority 0 rule to go to next table.
>
> Priority 100 rules in IN_IP_DNAT and IN_IP_SNAT serves 2 purposes.
> - Avoid NAT when vms in different LSWITCHES connected via a LROUTER talk
to
>   each other using private ips.
> - When 2 VMs connected to the same LSWITCH or different LSWITCHES
connected
>   via a router try to talk to each other, the dst ip of the packet should
>   first be DNATed and then the src ip should be SNATed.
>
> The initial design was to stage DNAT in the ingress pipeline and the SNAT
in
> the egress pipeline, but now both the stages are in the ingress pipeline.
This
> was done to solve the cases highlighted above [Priority 100 rules]. There
is a
> need to use information from DNAT stage when SNAT is being processed.
This
> would require an explicit register to be burnt to store the information.
>
> Flows modified in the LSWITCH pipeline
>
> Rules in IN_PORT_SEC:
> - Priority 50 rule to allow packets ingressing the LSWITCH router port
>   with a src mac of floating ip port
>
> Rules in ARP_RSP:
> - Priority 150 rule to respond to arp request for floating ip. To prevent
arp
>   responses for floating ip's from all the chassis, "lport" option is set
in
>   the external_id's column of the lflow table. lport will point to
> the vif-id of
>   the vm that is associated with the floating ip. When ovn-controller is
>   processing the flows, if it sees an lport option set in the
external_ids
>   column, it will install this lflow only if the lport is a local port on
the
>   chassis.
>
> Rules in L2_LKUP:
> - Priority 50 rule to set the outport to the lrouter port when the dst
mac
>   matches the floating ip mac
>
> Rules in OUT_PORT_SEC:
> - Priority 50 rule to allow packet egressing the lrouter port with a mac
of a
>   floating ip port.
>
> Had to increase MAX_RESUBMIT_RECURSION from 64 to 96. When 2 VMs
connected
> via vm1->LS->LR->LS->LR->LS->vm2 are trying to talk to each other, the
> resubmits are exceeding the existing 64 limit.
>
> When a floating ip is associated with a VM ip, NB will set the options of
the
> floating ip lport to "fixed-ip-port=<lport of vif>, router-port=<lport of
the
> logical router port".
>
> If you want to try out this patch with openstack, add the following patch
[1]
> to networking-ovn.
>
> [1] https://review.openstack.org/#/c/295547/
> ---
>  ofproto/ofproto-dpif-xlate.c    |   2 +-
>  ovn/controller/binding.c        |  24 ++-
>  ovn/controller/binding.h        |   4 +-
>  ovn/controller/lflow.c          |  21 ++-
>  ovn/controller/lflow.h          |   3 +-
>  ovn/controller/ovn-controller.c |   7 +-
>  ovn/northd/ovn-northd.c         | 360 +++++++++++++++++++++++++++++
> ++++++++---
>  7 files changed, 378 insertions(+), 43 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 67504e8..4a5aae2 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -68,7 +68,7 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
>
>  /* Maximum depth of flow table recursion (due to resubmit actions) in a
>   * flow translation. */
> -#define MAX_RESUBMIT_RECURSION 64
> +#define MAX_RESUBMIT_RECURSION 96
>  #define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules
in
>                                        internal table. */
>
> diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
> index d3ca9c9..f4e0f4a 100644
> --- a/ovn/controller/binding.c
> +++ b/ovn/controller/binding.c
> @@ -49,7 +49,7 @@ binding_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>                           &ovsrec_interface_col_ingress_policing_burst);
>  }
>
> -static void
> +void
>  get_local_iface_ids(const struct ovsrec_bridge *br_int, struct shash
*lports)
>  {
>      int i;
> @@ -149,7 +149,8 @@ update_qos(const struct ovsrec_interface *iface_rec,
>  void
>  binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge
*br_int,
>              const char *chassis_id, struct simap *ct_zones,
> -            unsigned long *ct_zone_bitmap, struct hmap *local_datapaths)
> +            unsigned long *ct_zone_bitmap, struct hmap *local_datapaths,
> +            struct sset *all_lports)
>  {
>      const struct sbrec_chassis *chassis_rec;
>      const struct sbrec_port_binding *binding_rec;
> @@ -167,10 +168,9 @@ binding_run(struct controller_ctx *ctx, const
> struct ovsrec_bridge *br_int,
>           * We'll remove our chassis from all port binding records below.
*/
>      }
>
> -    struct sset all_lports = SSET_INITIALIZER(&all_lports);
>      struct shash_node *node;
>      SHASH_FOR_EACH (node, &lports) {
> -        sset_add(&all_lports, node->name);
> +        sset_add(all_lports, node->name);
>      }
>
>      /* Run through each binding record to see if it is resident on this
> @@ -181,10 +181,10 @@ binding_run(struct controller_ctx *ctx, const
> struct ovsrec_bridge *br_int,
>              = shash_find_and_delete(&lports, binding_rec->logical_port);
>          if (iface_rec
>              || (binding_rec->parent_port && binding_rec->parent_port[0]
&&
> -                sset_contains(&all_lports, binding_rec->parent_port))) {
> +                sset_contains(all_lports, binding_rec->parent_port))) {
>              if (binding_rec->parent_port && binding_rec->parent_port[0])
{
>                  /* Add child logical port to the set of all local ports.
*/
> -                sset_add(&all_lports, binding_rec->logical_port);
> +                sset_add(all_lports, binding_rec->logical_port);
>              }
>              add_local_datapath(local_datapaths, binding_rec);
>              if (iface_rec && ctx->ovs_idl_txn) {
> @@ -217,7 +217,14 @@ binding_run(struct controller_ctx *ctx, const
> struct ovsrec_bridge *br_int,
>               * to list them in all_lports because we want to allocate
>               * a conntrack zone ID for each one, as we'll be creating
>               * a patch port for each one. */
> -            sset_add(&all_lports, binding_rec->logical_port);
> +            sset_add(all_lports, binding_rec->logical_port);
> +        }
> +        else if (!binding_rec->chassis
> +                           && !strcmp(binding_rec->type, "floating-ip"))
{
> +            const char *peer = smap_get(&binding_rec->options, "peer");
> +            if (peer && sset_contains(all_lports, peer)) {
> +                    add_local_datapath(local_datapaths, binding_rec);
> +            }
>          }
>      }
>
> @@ -225,10 +232,9 @@ binding_run(struct controller_ctx *ctx, const
> struct ovsrec_bridge *br_int,
>          VLOG_DBG("No port binding record for lport %s", node->name);
>      }
>
> -    update_ct_zones(&all_lports, ct_zones, ct_zone_bitmap);
> +    update_ct_zones(all_lports, ct_zones, ct_zone_bitmap);
>
>      shash_destroy(&lports);
> -    sset_destroy(&all_lports);
>  }
>
>  /* Returns true if the database is all cleaned up, false if more work is
> diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
> index 6e19c10..73e6b0c 100644
> --- a/ovn/controller/binding.h
> +++ b/ovn/controller/binding.h
> @@ -24,11 +24,13 @@ struct hmap;
>  struct ovsdb_idl;
>  struct ovsrec_bridge;
>  struct simap;
> +struct sset;
>
>  void binding_register_ovs_idl(struct ovsdb_idl *);
>  void binding_run(struct controller_ctx *, const struct ovsrec_bridge
*br_int,
>                   const char *chassis_id, struct simap *ct_zones,
> -                 unsigned long *ct_zone_bitmap, struct hmap
> *local_datapaths);
> +                 unsigned long *ct_zone_bitmap, struct hmap
*local_datapaths,
> +                 struct sset *all_lports);
>  bool binding_cleanup(struct controller_ctx *, const char *chassis_id);
>
>  #endif /* ovn/binding.h */
> diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
> index 0614a54..a59d26f 100644
> --- a/ovn/controller/lflow.c
> +++ b/ovn/controller/lflow.c
> @@ -16,6 +16,7 @@
>  #include <config.h>
>  #include "lflow.h"
>  #include "lport.h"
> +#include "lib/sset.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "ofctrl.h"
>  #include "ofp-actions.h"
> @@ -198,7 +199,8 @@ static void
>  add_logical_flows(struct controller_ctx *ctx, const struct
> lport_index *lports,
>                    const struct mcgroup_index *mcgroups,
>                    const struct hmap *local_datapaths,
> -                  const struct simap *ct_zones, struct hmap *flow_table)
> +                  const struct simap *ct_zones, struct hmap *flow_table,
> +                  struct sset *local_ports)
>  {
>      uint32_t conj_id_ofs = 1;
>
> @@ -240,6 +242,18 @@ add_logical_flows(struct controller_ctx *ctx,
> const struct lport_index *lports,
>              }
>          }
>
> +        /* The following check is specifically for floating-ip ports.
> +         * This will prevent from installing the arp request rule for
> +         * floating ip, unless the lport in the flow points to a local
> +         * port which is a resident on this chassis */
> +        const char *lport = smap_get(&lflow->external_ids, "lport");
> +        if (lport) {
> +            if (!sset_contains(local_ports, lport)) {
> +                continue;
> +            }
> +        }
> +
> +
>          /* Determine translation of logical table IDs to physical
> table IDs. */
>          uint8_t first_ptable = (ingress
>                                  ? OFTABLE_LOG_INGRESS_PIPELINE
> @@ -416,10 +430,11 @@ void
>  lflow_run(struct controller_ctx *ctx, const struct lport_index *lports,
>            const struct mcgroup_index *mcgroups,
>            const struct hmap *local_datapaths,
> -          const struct simap *ct_zones, struct hmap *flow_table)
> +          const struct simap *ct_zones, struct hmap *flow_table,
> +          struct sset *local_ports)
>  {
>      add_logical_flows(ctx, lports, mcgroups, local_datapaths,
> -                      ct_zones, flow_table);
> +                      ct_zones, flow_table, local_ports);
>      add_neighbor_flows(ctx, lports, flow_table);
>  }
>
> diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
> index ff823d4..3147e5c 100644
> --- a/ovn/controller/lflow.h
> +++ b/ovn/controller/lflow.h
> @@ -41,6 +41,7 @@ struct lport_index;
>  struct mcgroup_index;
>  struct simap;
>  struct uuid;
> +struct sset;
>
>  /* OpenFlow table numbers.
>   *
> @@ -63,7 +64,7 @@ void lflow_run(struct controller_ctx *, const
> struct lport_index *,
>                 const struct mcgroup_index *,
>                 const struct hmap *local_datapaths,
>                 const struct simap *ct_zones,
> -               struct hmap *flow_table);
> +               struct hmap *flow_table, struct sset *local_ports);
>  void lflow_destroy(void);
>
>  #endif /* ovn/lflow.h */
> diff --git a/ovn/controller/ovn-controller.c
b/ovn/controller/ovn-controller.c
> index e52b731..3e0b8e3 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -33,6 +33,7 @@
>  #include "encaps.h"
>  #include "fatal-signal.h"
>  #include "hmap.h"
> +#include "sset.h"
>  #include "lflow.h"
>  #include "lib/vswitch-idl.h"
>  #include "lport.h"
> @@ -284,12 +285,13 @@ main(int argc, char *argv[])
>
>          const struct ovsrec_bridge *br_int = get_br_int(&ctx);
>          const char *chassis_id = get_chassis_id(ctx.ovs_idl);
> +        struct sset local_ports = SSET_INITIALIZER(&local_ports);
>
>          if (chassis_id) {
>              chassis_run(&ctx, chassis_id);
>              encaps_run(&ctx, br_int, chassis_id);
>              binding_run(&ctx, br_int, chassis_id, &ct_zones,
ct_zone_bitmap,
> -                    &local_datapaths);
> +                    &local_datapaths, &local_ports);
>          }
>
>          if (br_int) {
> @@ -306,7 +308,8 @@ main(int argc, char *argv[])
>
>              struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
>              lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
> -                      &ct_zones, &flow_table);
> +                      &ct_zones, &flow_table, &local_ports);
> +            sset_destroy(&local_ports);
>              if (chassis_id) {
>                  physical_run(&ctx, mff_ovn_geneve,
>                               br_int, chassis_id, &ct_zones, &flow_table,
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 598bbe3..12e7ebd 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -102,9 +102,11 @@ enum ovn_stage {
>      /* Logical router ingress stages. */                              \
>      PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
>      PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
> -    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "lr_in_ip_routing")   \
> -    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 3, "lr_in_arp_resolve")  \
> -    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 4, "lr_in_arp_request")  \
> +    PIPELINE_STAGE(ROUTER, IN,  IP_DNAT,     2, "lr_in_ip_dnat")      \
> +    PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  3, "lr_in_ip_routing")   \
> +    PIPELINE_STAGE(ROUTER, IN,  IP_SNAT,     4, "lr_in_ip_snat")      \
> +    PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 5, "lr_in_arp_resolve")  \
> +    PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 6, "lr_in_arp_request")  \
>                                                                        \
>      /* Logical router egress stages. */                               \
>      PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "lr_out_delivery")
> @@ -479,6 +481,7 @@ struct ovn_port {
>      ovs_be32 ip, mask;          /* 192.168.10.123/24. */
>      ovs_be32 network;           /* 192.168.10.0. */
>      ovs_be32 bcast;             /* 192.168.10.255. */
> +    ovs_be32 fixed_ip;          /* fixed-ip for floating-ip */
>      struct eth_addr mac;
>      struct ovn_port *peer;
>
> @@ -541,6 +544,20 @@ ovn_port_allocate_key(struct ovn_datapath *od)
>                            (1u << 15) - 1, &od->port_key_hint);
>  }
>
> +static const char *
> +get_router_port_for_floating_ip(struct ovn_port *op, struct hmap *ports)
> +{
> +    const char *lrp_name = smap_get(&op->nbs->options, "router-port");
> +    if (lrp_name) {
> +        struct ovn_port *lrp = ovn_port_find(ports, lrp_name);
> +        if (lrp && lrp->nbs)
> +        {
> +            return lrp->json_key;
> +        }
> +    }
> +    return op->json_key;
> +}
> +
>  static void
>  join_logical_ports(struct northd_context *ctx,
>                     struct hmap *datapaths, struct hmap *ports,
> @@ -671,10 +688,35 @@ join_logical_ports(struct northd_context *ctx,
>              op->peer = ovn_port_find(ports, op->nbr->name);
>          }
>      }
> +
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (op->nbs && !strcmp(op->nbs->type, "floating-ip")) {
> +            const char *peer_name = smap_get(&op->nbs->options,
> +                                             "fixed-ip-port");
> +            if (!peer_name) {
> +                continue;
> +            }
> +
> +            struct ovn_port *peer = ovn_port_find(ports, peer_name);
> +            if (!peer || !peer->nbs) {
> +                continue;
> +            }
> +            struct eth_addr mac;
> +            ovs_be32 ip;
> +
> +            /* Not sure if a port with multiple IP addresses can be
> +             * mapped to a floating-ip. For now, just using first ip */
> +            if (ovs_scan(peer->nbs->addresses[0],
> +                     ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
> +                     ETH_ADDR_SCAN_ARGS(mac), IP_SCAN_ARGS(&ip))) {
> +                op->fixed_ip = ip;
> +            }
> +        }
> +    }
>  }
>
>  static void
> -ovn_port_update_sbrec(const struct ovn_port *op)
> +ovn_port_update_sbrec(const struct ovn_port *op, struct hmap *ports)
>  {
>      sbrec_port_binding_set_datapath(op->sb, op->od->sb);
>      if (op->nbr) {
> @@ -688,7 +730,20 @@ ovn_port_update_sbrec(const struct ovn_port *op)
>          sbrec_port_binding_set_tag(op->sb, NULL, 0);
>          sbrec_port_binding_set_mac(op->sb, NULL, 0);
>      } else {
> -        if (strcmp(op->nbs->type, "router")) {
> +        if (op->nbs && !strcmp(op->nbs->type, "floating-ip")) {
> +            const char *peer_name = smap_get(&op->nbs->options,
> +                                             "fixed-ip-port");
> +            if (peer_name) {
> +                struct ovn_port *peer = ovn_port_find(ports, peer_name);
> +                if (peer) {
> +                    const struct smap ids = SMAP_CONST1(&ids, "peer",
> +                                             peer_name);
> +                    sbrec_port_binding_set_options(op->sb, &ids);
> +                }
> +            }
> +            sbrec_port_binding_set_type(op->sb, op->nbs->type);
> +        }
> +        else if (strcmp(op->nbs->type, "router")) {
>              sbrec_port_binding_set_type(op->sb, op->nbs->type);
>              sbrec_port_binding_set_options(op->sb, &op->nbs->options);
>          } else {
> @@ -727,7 +782,7 @@ build_ports(struct northd_context *ctx, struct
> hmap *datapaths,
>       * record based on northbound data.  Also index the in-use
> tunnel_keys. */
>      struct ovn_port *op, *next;
>      LIST_FOR_EACH_SAFE (op, next, list, &both) {
> -        ovn_port_update_sbrec(op);
> +        ovn_port_update_sbrec(op, ports);
>
>          add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
>          if (op->sb->tunnel_key > op->od->port_key_hint) {
> @@ -743,7 +798,7 @@ build_ports(struct northd_context *ctx, struct
> hmap *datapaths,
>          }
>
>          op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
> -        ovn_port_update_sbrec(op);
> +        ovn_port_update_sbrec(op, ports);
>
>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>          sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
> @@ -869,6 +924,8 @@ struct ovn_lflow {
>      uint16_t priority;
>      char *match;
>      char *actions;
> +    char *lport; /* is not null, indicates that the flow should be
installed
> +                    on a chassis if the lport is local to that chassis
*/
>  };
>
>  static size_t
> @@ -900,6 +957,18 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct
> ovn_datapath *od,
>      lflow->priority = priority;
>      lflow->match = match;
>      lflow->actions = actions;
> +    lflow->lport = NULL;
> +}
> +
> +static void
> +ovn_lflow_lport_set(struct ovn_lflow *lflow, const char *lport_name)
> +{
> +    if (lport_name) {
> +        lflow->lport = xstrdup(lport_name);
> +    }
> +    else {
> +        lflow->lport = NULL;
> +    }
>  }
>
>  /* Adds a row with the specified contents to the Logical_Flow table. */
> @@ -1155,7 +1224,8 @@ build_port_security_ipv6_flow(
>   *   - Priority 80 flow to drop ARP and IPv6 ND packets.
>   */
>  static void
> -build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
> +build_port_security_nd(struct ovn_port *op, struct hmap *lflows,
> +                       struct hmap *ports)
>  {
>      for (size_t i = 0; i < op->nbs->n_port_security; i++) {
>          struct lport_addresses ps;
> @@ -1168,11 +1238,19 @@ build_port_security_nd(struct ovn_port *op,
> struct hmap *lflows)
>
>          bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
>          struct ds match = DS_EMPTY_INITIALIZER;
> +
> +        const char *inport = NULL;
> +        if (!strcmp(op->nbs->type, "floating-ip")) {
> +            inport = get_router_port_for_floating_ip(op, ports);
> +        }
> +        else {
> +            inport = op->json_key;
> +        }
>
>          if (ps.n_ipv4_addrs || no_ip) {
>              ds_put_format(
>                  &match, "inport == %s && eth.src == "ETH_ADDR_FMT"
> && arp.sha == "
> -                ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
> +                ETH_ADDR_FMT, inport, ETH_ADDR_ARGS(ps.ea),
>                  ETH_ADDR_ARGS(ps.ea));
>
>              if (ps.n_ipv4_addrs) {
> @@ -1228,7 +1306,7 @@ build_port_security_nd(struct ovn_port *op,
> struct hmap *lflows)
>   */
>  static void
>  build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
> -                       struct hmap *lflows)
> +                       struct hmap *lflows, struct hmap *ports)
>  {
>      char *port_direction;
>      enum ovn_stage stage;
> @@ -1250,16 +1328,25 @@ build_port_security_ip(enum ovn_pipeline
> pipeline, struct ovn_port *op,
>              continue;
>          }
>
> +        const char *port = NULL;
> +        if (!strcmp(op->nbs->type, "floating-ip")) {
> +            port = get_router_port_for_floating_ip(op, ports);
> +        }
> +        else {
> +            port = op->json_key;
> +        }
> +
> +
>          if (ps.n_ipv4_addrs) {
>              struct ds match = DS_EMPTY_INITIALIZER;
>              if (pipeline == P_IN) {
>                  ds_put_format(&match, "inport == %s && eth.src ==
> "ETH_ADDR_FMT
> -                              " && ip4.src == {0.0.0.0, ", op->json_key,
> +                              " && ip4.src == {0.0.0.0, ", port,
>                                ETH_ADDR_ARGS(ps.ea));
>              } else {
>                  ds_put_format(&match, "outport == %s && eth.dst ==
> "ETH_ADDR_FMT
>                                " && ip4.dst == {255.255.255.255,
224.0.0.0/4
> , ",
> -                              op->json_key, ETH_ADDR_ARGS(ps.ea));
> +                              port, ETH_ADDR_ARGS(ps.ea));
>              }
>
>              for (int i = 0; i < ps.n_ipv4_addrs; i++) {
> @@ -1525,18 +1612,26 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>              continue;
>          }
>
> +        const char *inport = NULL;
> +        if (!strcmp(op->nbs->type, "floating-ip")) {
> +            inport = get_router_port_for_floating_ip(op, ports);
> +        }
> +        else {
> +            inport = op->json_key;
> +        }
> +
>          struct ds match = DS_EMPTY_INITIALIZER;
> -        ds_put_format(&match, "inport == %s", op->json_key);
> +        ds_put_format(&match, "inport == %s", inport);
>          build_port_security_l2(
> -            "eth.src", op->nbs->port_security, op->nbs->n_port_security,
> +                       "eth.src", op->nbs->port_security,
> op->nbs->n_port_security,
>              &match);
>          ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
>                        ds_cstr(&match), "next;");
>          ds_destroy(&match);
>
>          if (op->nbs->n_port_security) {
> -            build_port_security_ip(P_IN, op, lflows);
> -            build_port_security_nd(op, lflows);
> +            build_port_security_ip(P_IN, op, lflows, ports);
> +            build_port_security_nd(op, lflows, ports);
>          }
>      }
>
> @@ -1578,10 +1673,26 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>           *  - port is up or
>           *  - port type is router
>           */
> -        if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
> +        if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router") &&
> +                                     strcmp(op->nbs->type,
"floating-ip")) {
>              continue;
>          }
>
> +        uint16_t priority = 0;
> +        if (!strcmp(op->nbs->type, "floating-ip")) {
> +            const char *peer_name = smap_get(&op->nbs->options,
> +                                             "fixed-ip-port");
> +            if (peer_name) {
> +                priority = 150;
> +            }
> +            else {
> +                priority = 50;
> +            }
> +        }
> +        else {
> +            priority = 50;
> +        }
> +
>          for (size_t i = 0; i < op->nbs->n_addresses; i++) {
>              struct lport_addresses laddrs;
>              if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
> @@ -1606,8 +1717,20 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>                      ETH_ADDR_ARGS(laddrs.ea),
>                      ETH_ADDR_ARGS(laddrs.ea),
>                      IP_ARGS(laddrs.ipv4_addrs[j].addr));
> -                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
> +                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP,
priority,
>                                match, actions);
> +                if (!strcmp(op->nbs->type, "floating-ip")) {
> +                    const char *peer_name = smap_get(&op->nbs->options,
> +                                                     "fixed-ip-port");
> +                    struct ovn_lflow *lflow = ovn_lflow_find(lflows,
op->od,
> +                            S_SWITCH_IN_ARP_RSP, priority, match,
actions);
> +                    /* Setting the lport option in external_ids of
lflow, so
> +                     * that the controller will pick up this flow only
if the
> +                     * lport is a local port on the chassis */
> +                    if (lflow) {
> +                        ovn_lflow_lport_set(lflow, peer_name);
> +                    }
> +                }
>                  free(match);
>                  free(actions);
>              }
> @@ -1662,8 +1785,15 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>                  ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
>                                ETH_ADDR_ARGS(mac));
>
> +                const char *outport = NULL;
> +                if (!strcmp(op->nbs->type, "floating-ip")) {
> +                    outport = get_router_port_for_floating_ip(op,
ports);
> +                }
> +                else {
> +                    outport = op->json_key;
> +                }
>                  ds_init(&actions);
> -                ds_put_format(&actions, "outport = %s; output;",
> op->json_key);
> +                ds_put_format(&actions, "outport = %s; output;",
outport);
>                  ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
>                                ds_cstr(&match), ds_cstr(&actions));
>                  ds_destroy(&actions);
> @@ -1722,8 +1852,15 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>              continue;
>          }
>
> +        const char *outport = NULL;
> +        if (!strcmp(op->nbs->type, "floating-ip")) {
> +            outport = get_router_port_for_floating_ip(op, ports);
> +        }
> +        else {
> +            outport = op->json_key;
> +        }
>          struct ds match = DS_EMPTY_INITIALIZER;
> -        ds_put_format(&match, "outport == %s", op->json_key);
> +        ds_put_format(&match, "outport == %s", outport);
>          if (lport_is_enabled(op->nbs)) {
>              build_port_security_l2("eth.dst", op->nbs->port_security,
>                                     op->nbs->n_port_security, &match);
> @@ -1737,7 +1874,7 @@ build_lswitch_flows(struct hmap *datapaths,
> struct hmap *ports,
>          ds_destroy(&match);
>
>          if (op->nbs->n_port_security) {
> -            build_port_security_ip(P_OUT, op, lflows);
> +            build_port_security_ip(P_OUT, op, lflows, ports);
>          }
>      }
>  }
> @@ -1819,6 +1956,48 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
>          free(match);
>      }
>
> +    /* Logical router ingress table 0: match (priority 50).
> +     * The following rules allow packets with mac address
> +     * of floating ip ports ingressing on a logical router port */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!(od->nbr && od->gateway_port)) {
> +            continue;
> +        }
> +        struct ovn_port *lrp = od->gateway_port->peer;
> +        if (!lrp) {
> +            VLOG_ERR("No peer port for logical router port %s",
> +                        od->gateway_port->key);
> +            continue;
> +        }
> +        const struct nbrec_logical_switch *nbs = lrp->od->nbs;
> +        for (size_t i = 0 ; i < nbs->n_ports ; i++) {
> +            if (nbs->ports[i] && !strcmp(nbs->ports[i]->type,
> "floating-ip")) {
> +                const char *peer_name = smap_get(&nbs->ports[i]->
options,
> +                                                 "fixed-ip-port");
> +                const char *lrp_name = smap_get(&nbs->ports[i]->options,
> +                                                 "router-port");
> +                if (!peer_name || !lrp_name) {
> +                    continue;
> +                }
> +                if (strcmp(lrp_name, lrp->key)) {
> +                    continue;
> +                }
> +                for (size_t j = 0; j < nbs->ports[i]->n_addresses; j++)
{
> +                    struct eth_addr mac;
> +                    char *match;
> +                    if
> (eth_addr_from_string(nbs->ports[i]->addresses[j], &mac)) {
> +                        match = xasprintf("(eth.mcast || eth.dst == "
> +                           ETH_ADDR_FMT") && inport == %s",
> +                           ETH_ADDR_ARGS(mac), od->gateway_port->
json_key);
> +                        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION,
> +                                      50, match, "next;");
> +                        free(match);
> +                    }
> +                }
> +            }
> +        }
> +    }
> +
>      /* Logical router ingress table 1: IP Input. */
>      HMAP_FOR_EACH (od, key_node, datapaths) {
>          if (!od->nbr) {
> @@ -1928,7 +2107,7 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
>          free(match);
>      }
>
> -    /* Logical router ingress table 2: IP Routing.
> +    /* Logical router ingress table 3: IP Routing.
>       *
>       * A packet that arrives at this table is an IP packet that should
be
>       * routed to the address in ip4.dst. This table sets outport to
> the correct
> @@ -1953,7 +2132,7 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
>      }
>      /* XXX destination unreachable */
>
> -    /* Local router ingress table 3: ARP Resolution.
> +    /* Local router ingress table 5: ARP Resolution.
>       *
>       * Any packet that reaches this table is an IP packet whose next-hop
IP
>       * address is in reg0. (ip4.dst is the final destination.) This
table
> @@ -2021,7 +2200,7 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
>                        "get_arp(outport, reg0); next;");
>      }
>
> -    /* Local router ingress table 4: ARP request.
> +    /* Local router ingress table 6: ARP request.
>       *
>       * In the common case where the Ethernet destination has been
resolved,
>       * this table outputs the packet (priority 100).  Otherwise, it
composes
> @@ -2042,6 +2221,131 @@ build_lrouter_flows(struct hmap *datapaths,
> struct hmap *ports,
>          ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1",
> "output;");
>      }
>
> +    /* DNAT & SNAT tables /
> +     *
> +     * Priority 100 rule in IN_IP_DNAT to set reg2 to 0x1 if dst ip &
> +     * src ip networks are connected via the same router.
> +     *
> +     * Priority 100 rule in IN_IP_SNAT to skip modifying the src ip when
> +     * reg2 is set to 0x1.
> +     *
> +     * Priority 90 rule in IN_IP_DNAT to modify dst ip from floating-ip
> +     * vm-ip.
> +     *
> +     * Priority 90 rule in IN_IP_SNAT to modify src ip from vm ip to
> +     * floating ip and dst mac to floating ip port mac if the packet is
> +     * egressing via the gateway port.
> +     *
> +     * Priority 50 rule in IP_IP_SNAT to modify src ip from vm ip to
> +     * floating ip.
> +     *
> +     * Pririty 0 rule to go to next table if none of the above rules
match.
> +     */
> +
> +    HMAP_FOR_EACH (op, key_node, ports) {
> +        if (!(op->nbs && !strcmp(op->nbs->type, "floating-ip"))) {
> +            continue;
> +        }
> +        const char *peer_name = smap_get(&op->nbs->options,
"fixed-ip-port");
> +        const char *lrp_name = smap_get(&op->nbs->options,
"router-port");
> +        if (!peer_name || !lrp_name) {
> +            continue;
> +        }
> +        struct ovn_port *lrp = ovn_port_find(ports, lrp_name);
> +        if (!lrp) {
> +            continue;
> +        }
> +        for (size_t i = 0; i < op->nbs->n_addresses; i++) {
> +            char *match;
> +            char *actions;
> +            struct eth_addr mac;
> +            ovs_be32 ip;
> +            if (ovs_scan(op->nbs->addresses[i],
> +                     ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
> +                     ETH_ADDR_SCAN_ARGS(mac), IP_SCAN_ARGS(&ip))) {
> +                match = xasprintf("ip4.dst == "IP_FMT"", IP_ARGS(ip));
> +                actions = xasprintf("ip4.dst = "IP_FMT"; inport =
> \"\"; next;",
> +                                     IP_ARGS(op->fixed_ip));
> +                ovn_lflow_add(lflows, lrp->peer->od,
S_ROUTER_IN_IP_DNAT,
> +                                90, match, actions);
> +                free(match);
> +                free(actions);
> +
> +                match = xasprintf("(ip4.src == "IP_FMT") && outport ==
%s",
> +                        IP_ARGS(op->fixed_ip), lrp->peer->json_key);
> +                actions = xasprintf("eth.src = "ETH_ADDR_FMT";"
> +                                    " ip4.src = "IP_FMT"; next;",
> +                                    ETH_ADDR_ARGS(mac), IP_ARGS(ip));
> +                ovn_lflow_add(lflows, lrp->peer->od,
> +                              S_ROUTER_IN_IP_SNAT, 90, match, actions);
> +                free(match);
> +                free(actions);
> +
> +                match = xasprintf("ip4.src == "IP_FMT"",
> +                                   IP_ARGS(op->fixed_ip));
> +                actions = xasprintf("ip4.src = "IP_FMT"; next;",IP_ARGS
(ip));
> +                ovn_lflow_add(lflows, lrp->peer->od,
> +                              S_ROUTER_IN_IP_SNAT, 50, match, actions);
> +                free(match);
> +                free(actions);
> +            }
> +        }
> +    }
> +
> +    HMAP_FOR_EACH(od, key_node, datapaths) {
> +        if (!od->nbr) {
> +            continue;
> +        }
> +
> +        /* Default rules for DNAT & SNAT tables with priority 0. */
> +        if (od->gateway_port) {
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_DNAT, 0, "1",
"next;");
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_SNAT, 0, "1",
"next;");
> +        }
> +
> +        /* The following rules in DNAT & SNAT tables will prevent
> NAT when the
> +         * src & dst ips belong to private networks that are connected
via a
> +         * router */
> +        bool add_snat_flow = false;
> +        for (size_t j = 0; j < od->nbr->n_ports;j++) {
> +            if (od->gateway_port && !strcmp(od->nbr->ports[j]->name,
> +                                            od->gateway_port->key)) {
> +                continue;
> +            }
> +            ovs_be32 ip1, ip2, mask1, mask2;
> +            char *error =
> ip_parse_masked(od->nbr->ports[j]->network, &ip1, &mask1);
> +            if (error || mask1 == OVS_BE32_MAX || !ip_is_cidr(mask1)) {
> +                free(error);
> +                continue;
> +            }
> +            for (size_t l = 0; l < od->nbr->n_ports;l++) {
> +                if ((l == j) || (od->gateway_port &&
> +                                    !strcmp(od->nbr->ports[l]->name,
> +                                            od->gateway_port->key))) {
> +                    continue;
> +                }
> +                char *error =
> ip_parse_masked(od->nbr->ports[l]->network, &ip2, &mask2);
> +                if (error || mask2 == OVS_BE32_MAX || !ip_is_cidr
(mask2)) {
> +                    free(error);
> +                    continue;
> +                }
> +                char *match = xasprintf("(ip4.src == "IP_FMT"/"IP_FMT")
&& "
> +                                   "(ip4.dst == "IP_FMT"/"IP_FMT")",
> +                                   IP_ARGS(ip1 & mask1), IP_ARGS(mask1),
> +                                   IP_ARGS(ip2 & mask2), IP_ARGS
(mask2));
> +                ovn_lflow_add(lflows, od,
> +                              S_ROUTER_IN_IP_DNAT, 100, match,
> +                              "reg2 = 1; next;");
> +                free(match);
> +                add_snat_flow = true;
> +            }
> +        }
> +        if (add_snat_flow) {
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_SNAT, 100,
> +                          "reg2 == 1", "next;");
> +        }
> +    }
> +
>      /* Logical router egress table 0: Delivery (priority 100).
>       *
>       * Priority 100 rules deliver packets to enabled logical ports. */
> @@ -2111,8 +2415,12 @@ build_lflows(struct northd_context *ctx,
> struct hmap *datapaths,
>          sbrec_logical_flow_set_match(sbflow, lflow->match);
>          sbrec_logical_flow_set_actions(sbflow, lflow->actions);
>
> -        const struct smap ids = SMAP_CONST1(&ids, "stage-name",
> -                                            ovn_stage_to_str(lflow->
stage));
> +        struct smap ids;
> +        smap_init(&ids);
> +        if (lflow->lport) {
> +            smap_add(&ids, "lport", lflow->lport);
> +        }
> +        smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
>          sbrec_logical_flow_set_external_ids(sbflow, &ids);
>
>          ovn_lflow_destroy(&lflows, lflow);
> --
> 2.6.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to