Multipath hash policy value of 0 isn't distributing since the outer IP
dest and src aren't varied eventhough the inner ones are. Since the flow
is on the inner ones in the case of tunneled traffic, hashing on them is
desired.

This is done mainly for IP over GRE, hence only tested for that. But
anything else supported by flow dissection should work.

v2: Use skb_flow_dissect_flow_keys() directly so that other tunneling
    can be supported through flow dissection (per Nikolay Aleksandrov).
Signed-off-by: Stephen Suryaputra <ssuryae...@gmail.com>
---
 Documentation/networking/ip-sysctl.txt |  1 +
 net/ipv4/route.c                       | 20 ++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c             |  2 +-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt 
b/Documentation/networking/ip-sysctl.txt
index 5eedc6941ce5..2f3bc910895a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -80,6 +80,7 @@ fib_multipath_hash_policy - INTEGER
        Possible values:
        0 - Layer 3
        1 - Layer 4
+       2 - Inner Layer 3 for tunneled IP packets.
 
 fib_sync_mem - UNSIGNED INTEGER
        Amount of dirty memory from fib entries that can be backlogged before
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 14c7fdacaa72..c3e03bce0a3a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1872,6 +1872,26 @@ int fib_multipath_hash(const struct net *net, const 
struct flowi4 *fl4,
                        hash_keys.basic.ip_proto = fl4->flowi4_proto;
                }
                break;
+       case 2:
+               memset(&hash_keys, 0, sizeof(hash_keys));
+               hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+               /* skb is currently provided only when forwarding */
+               if (skb) {
+                       struct flow_keys keys;
+
+                       skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+                       hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+                       hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+                       hash_keys.ports.src = keys.ports.src;
+                       hash_keys.ports.dst = keys.ports.dst;
+                       hash_keys.basic.ip_proto = keys.basic.ip_proto;
+               } else {
+                       /* Same as case 0 */
+                       hash_keys.addrs.v4addrs.src = fl4->saddr;
+                       hash_keys.addrs.v4addrs.dst = fl4->daddr;
+               }
+               break;
        }
        mhash = flow_hash_from_keys(&hash_keys);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2316c08e9591..e1efc2e62d21 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -960,7 +960,7 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_fib_multipath_hash_policy,
                .extra1         = &zero,
-               .extra2         = &one,
+               .extra2         = &two,
        },
 #endif
        {
-- 
2.17.1

Reply via email to