The branch main has been updated by meta:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=93c2d7d5265c56c8734e70c19f838bd56190954a

commit 93c2d7d5265c56c8734e70c19f838bd56190954a
Author:     Koichiro Iwao <m...@freebsd.org>
AuthorDate: 2025-07-11 12:58:10 +0000
Commit:     Koichiro Iwao <m...@freebsd.org>
CommitDate: 2025-07-21 13:47:28 +0000

    if_gif(4): Support the NOCLAMP flag to change MTU handling for IPv6
    
    The patch was originally written by hrs [1], and later modified by meta
    to use named flags instead of generic link-layer flags.
    
    [1] https://reviews.freebsd.org/D45854
    
    PR:             280736
    Co-authored-by: Hiroki Sato <h...@freebsd.org>
    Reviewed by:    ae, ziaee, zlei, pauamma
    Reported by:    Kazuki Shimizu <kaz...@jtime.net>
    Approved by:    pauamma (manpages)
    Approved by:    ae
    MFC after:      2 weeks
    Sponsored by:   Cybertrust Japan
    Differential Revision:  https://reviews.freebsd.org/D51297
---
 sbin/ifconfig/ifconfig.8 |  16 ++++-
 sbin/ifconfig/ifgif.c    |   3 +
 share/man/man4/gif.4     | 154 +++++++++++++++++++++++++++++++++++++++++++----
 sys/net/if_gif.h         |   3 +-
 sys/netinet6/in6_gif.c   |  18 ++++--
 5 files changed, 176 insertions(+), 18 deletions(-)

diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
index 6c61af48abec..b6e7d3ff2c63 100644
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd July 11, 2025
+.Dd July 14, 2025
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
@@ -2878,13 +2878,25 @@ interfaces previously configured with
 Another name for the
 .Fl tunnel
 parameter.
+.It Cm noclamp
+This flag prevents the MTU from being clamped to 1280 bytes, the
+minimum MTU for IPv6, when the outer protocol is IPv6.  When the
+flag is set, the MTU value configured on the interface will be
+used instead of the fixed length of 1280 bytes. For more details,
+please refer to the
+.Ar MTU Configuration and Path MTU Discovery
+section in
+.Xr gif 4 .
+.It Cm -noclamp
+Clear the flag
+.Cm noclamp .
 .It Cm ignore_source
 Set a flag to accept encapsulated packets destined to this host
 independently from source address.
 This may be useful for hosts, that receive encapsulated packets
 from the load balancers.
 .It Cm -ignore_source
-Clear a flag
+Clear the flag
 .Cm ignore_source .
 .El
 .Ss GRE Tunnel Parameters
diff --git a/sbin/ifconfig/ifgif.c b/sbin/ifconfig/ifgif.c
index 991cf110678f..9b8be210a59e 100644
--- a/sbin/ifconfig/ifgif.c
+++ b/sbin/ifconfig/ifgif.c
@@ -49,6 +49,7 @@
 #include "ifconfig.h"
 
 static const char *GIFBITS[] = {
+       [0] = "NOCLAMP",
        [1] = "IGNORE_SOURCE",
 };
 
@@ -90,6 +91,8 @@ setgifopts(if_ctx *ctx, const char *val __unused, int d)
 }
 
 static struct cmd gif_cmds[] = {
+       DEF_CMD("noclamp",              GIF_NOCLAMP,            setgifopts),
+       DEF_CMD("-noclamp",             -GIF_NOCLAMP,           setgifopts),
        DEF_CMD("ignore_source",        GIF_IGNORE_SOURCE,      setgifopts),
        DEF_CMD("-ignore_source",       -GIF_IGNORE_SOURCE,     setgifopts),
 };
diff --git a/share/man/man4/gif.4 b/share/man/man4/gif.4
index 959510451011..ad33d5d21e81 100644
--- a/share/man/man4/gif.4
+++ b/share/man/man4/gif.4
@@ -1,6 +1,7 @@
 .\"    $KAME: gif.4,v 1.28 2001/05/18 13:15:56 itojun Exp $
 .\"
 .\" Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+.\" Copyright (C) 2024 Hiroki Sato <h...@freebsd.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
@@ -27,7 +28,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd October 21, 2018
+.Dd July 14, 2025
 .Dt GIF 4
 .Os
 .Sh NAME
@@ -67,8 +68,8 @@ variable in
 .Pp
 To use
 .Nm ,
-the administrator needs to configure the protocol and addresses used for the 
outer
-header.
+the administrator needs to configure the protocol and addresses used for
+the outer header.
 This can be done by using
 .Xr ifconfig 8
 .Cm tunnel ,
@@ -79,8 +80,7 @@ The administrator also needs to configure the protocol and 
addresses for the
 inner header, with
 .Xr ifconfig 8 .
 Note that IPv6 link-local addresses
-(those that start with
-.Li fe80:: )
+.Pq those that start with Li fe80\&:\&:
 will be automatically configured whenever possible.
 You may need to remove IPv6 link-local addresses manually using
 .Xr ifconfig 8 ,
@@ -89,12 +89,139 @@ if you want to disable the use of IPv6 as the inner header
 Finally, you must modify the routing table to route the packets through the
 .Nm
 interface.
+.Ss MTU Configuration and Path MTU Discovery
+The
+.Nm
+interface uses the fixed length,
+.Li 1280 ,
+to determine whether the outgoing IPv6 packets are split.
+This means the MTU value configured on the interface will be ignored
+when the outer protocol is IPv6.
+When the
+.Dv NOCLAMP
+interface flag is set,
+.Nm
+uses the same configured value as IPv4 communications.
+This behavior prevents potential issues when the path MTU is
+smaller than the interface MTU.
+This section describes the reason why the default behavior is different.
+The
+.Dv NOCLAMP
+interface flag can be set using the following command:
+.Pp
+.Dl ifconfig Ar gif0 Cm noclamp
+.Pp
+and clear the flag using the following:
+.Pp
+.Dl ifconfig Ar gif0 Cm -noclamp
+.Pp
+where
+.Ar gif0
+is the actual interface name.
+.Pp
+A tunnel interface always has an implicit smaller MTU for the inner protocol
+than the outer protocol because of the additional header.
+Note that the interface MTU on a
+.Nm
+interface,
+the default value is
+.Li 1280 ,
+is used as MTU for the outer protocol.
+This means that the MTU for the inner protocol varies depending on the
+outer protocol header length.
+If an outgoing packet bigger than the inner protocol MTU arrives at a
+.Nm
+interface for encapsulation,
+it will be split into fragments.
+Specifically,
+if IPv4 is used as the outer protocol,
+the inner is 20 octets smaller than the interface MTU.
+In the case of the default interface MTU,
+.Li 1280 ,
+inner packets bigger than
+.Li 1260
+will be fragmented.
+In the case of IPv6,
+the inner is 40 octets smaller than the outer.
+.Pp
+This fragmentation is not harmful though it can degrade the
+performance.
+Note that while an increased MTU on
+.Nm
+interface helps to mitigate this reduced performance issue,
+it can also cause packet losses on the intermediate narrowest path
+between the two communication endpoints in IPv6.
+IPv6 allows fragmentation only on the sender,
+not on the routers in the communication path.
+A big outgoing packet will be dropped on a router with a smaller MTU.
 .Pp
+In normal IPv6 communication,
+an ICMPv6 Packet Too Big error will be sent back to the sender,
+who can adjust the packet length and re-send it.
+This process is performed in the upper protocols than L3,
+such as TCP,
+and makes the packet length shorter so that packets go through
+the path without fragmentation.
+This behavior is known as path MTU discovery.
+.Pp
+When using a
+.Nm
+interface,
+the Packet Too Big message is generated for the outer protocol.
+Since the
+.Nm
+interface does not translate this error to the inner protocol,
+the inner protocol sees it just as a packet loss with no useful
+information to adjust the length of the next packets.
+In this situation,
+path MTU discovery does not work,
+and communications of the inner protocol
+become stalled.
+.Pp
+In order to avoid this,
+a
+.Nm
+interface silently splits a packet of over 1240 octets into fragments to make
+the outer protocol packets equal or shorter than 1280 octets,
+even when the interface MTU is configured as larger than 1280.
+Note that this occurs only when the outer protocol is IPv6.
+.Li 1280
+is the smallest MTU in IPv6 and guarantees no packet loss occurs
+on intermediate routers.
+.Pp
+As mentioned earlier,
+the performance is sub-optimal if the actual path MTU is larger than
+.Li 1280 .
+A typical confusing scenario is as follows.
 The
 .Nm
-device can be configured to be ECN friendly.
-This can be configured by
-.Dv IFF_LINK1 .
+interface can have Ethernet,
+whose MTU is usually 1500,
+as the inner protocol.
+It is called an EtherIP tunnel,
+and can be configured by adding the
+.Nm
+interface as a member of
+.Xr if_bridge 4
+interface.
+The
+.Xr if_bridge 4
+interface forcibly changes the MTU of the
+.Nm
+interface with those for the other member interfaces,
+which are likely 1500.
+In this case,
+a situation in which the MTU of the
+.Nm
+interface is 1500 but fragmentation in 1280 octets always occurs.
+.Pp
+The default behavior is most conservative to prevent confusing packet loss.
+Depending on the network configuration,
+enabling the
+.Dv NOCLAMP
+interface flag might be helpful for better performance.
+It is crucial to ensure that the path MTU is equal to or larger than
+the interface MTU when enabling this flag.
 .Ss ECN friendly behavior
 The
 .Nm
@@ -169,6 +296,7 @@ variable
 to the desired level of nesting.
 .Sh SEE ALSO
 .Xr gre 4 ,
+.Xr if_bridge 4 ,
 .Xr inet 4 ,
 .Xr inet6 4 ,
 .Xr ifconfig 8
@@ -199,7 +327,8 @@ There are many tunnelling protocol specifications, all
 defined differently from each other.
 The
 .Nm
-device may not interoperate with peers which are based on different 
specifications,
+device may not interoperate with peers which are based on different
+specifications,
 and are picky about outer header fields.
 For example, you cannot usually use
 .Nm
@@ -219,11 +348,14 @@ to 1240 or smaller, when the outer header is IPv6 and the 
inner header is IPv4.
 .Pp
 The
 .Nm
-device does not translate ICMP messages for the outer header into the inner 
header.
+device does not translate ICMP messages for the outer header into the inner
+header.
 .Pp
 In the past,
 .Nm
 had a multi-destination behavior, configurable via
-.Dv IFF_LINK0
+.Dv NOCLAMP
 flag.
 The behavior is obsolete and is no longer supported.
+This flag is now used to determine whether performing fragmentation when
+the outer protocol is IPv6.
diff --git a/sys/net/if_gif.h b/sys/net/if_gif.h
index 3c1846b8f82a..c6692d3dd6bc 100644
--- a/sys/net/if_gif.h
+++ b/sys/net/if_gif.h
@@ -120,7 +120,8 @@ int in6_gif_setopts(struct gif_softc *, u_int);
 #define GIFGOPTS       _IOWR('i', 150, struct ifreq)
 #define GIFSOPTS       _IOW('i', 151, struct ifreq)
 
+#define        GIF_NOCLAMP             0x0001
 #define        GIF_IGNORE_SOURCE       0x0002
-#define        GIF_OPTMASK             (GIF_IGNORE_SOURCE)
+#define        GIF_OPTMASK             (GIF_NOCLAMP|GIF_IGNORE_SOURCE)
 
 #endif /* _NET_IF_GIF_H_ */
diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c
index d476829e8e3b..2bab1c57ce2a 100644
--- a/sys/netinet6/in6_gif.c
+++ b/sys/netinet6/in6_gif.c
@@ -194,6 +194,11 @@ in6_gif_setopts(struct gif_softc *sc, u_int options)
                sc->gif_options = options;
                in6_gif_attach(sc);
        }
+
+       if ((options & GIF_NOCLAMP) !=
+           (sc->gif_options & GIF_NOCLAMP)) {
+               sc->gif_options = options;
+       }
        return (0);
 }
 
@@ -289,6 +294,7 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int 
proto, uint8_t ecn)
 {
        struct gif_softc *sc = ifp->if_softc;
        struct ip6_hdr *ip6;
+       u_long mtu;
 
        /* prepend new IP header */
        NET_EPOCH_ASSERT();
@@ -304,11 +310,15 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int 
proto, uint8_t ecn)
        ip6->ip6_nxt    = proto;
        ip6->ip6_hlim   = V_ip6_gif_hlim;
        /*
-        * force fragmentation to minimum MTU, to avoid path MTU discovery.
-        * it is too painful to ask for resend of inner packet, to achieve
-        * path MTU discovery for encapsulated packets.
+        * Enforce fragmentation to minimum MTU, even if the interface MTU
+        * is larger, to avoid path MTU discovery when NOCLAMP is not
+        * set (default).  IPv6 does not allow fragmentation on intermediate
+        * router nodes, so it is too painful to ask for resend of inner
+        * packet, to achieve path MTU discovery for encapsulated packets.
         */
-       return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
+       mtu = ((sc->gif_options & GIF_NOCLAMP) == 0) ? IPV6_MINMTU : 0;
+
+       return (ip6_output(m, 0, NULL, mtu, 0, NULL, NULL));
 }
 
 static int

Reply via email to