The branch main has been updated by kp:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c2346c3d3ab754299d1e75705997db6fb9a49420

commit c2346c3d3ab754299d1e75705997db6fb9a49420
Author:     Kristof Provost <k...@freebsd.org>
AuthorDate: 2025-02-13 13:14:56 +0000
Commit:     Kristof Provost <k...@freebsd.org>
CommitDate: 2025-02-20 08:25:51 +0000

    pf: support source-hash and random with tables and dynifs, not just pools
    
    This finally allows to use source-hash for dynamic loadbalancing, eg.
    "rdr-to <hosts> source-hash", instead of just round-robin and least-states.
    
    An older pre-siphash version of this diff was tested by many people.
    
    OK tedu@ benno@
    
    Obtained from:  OpenBSD, reyk <r...@openbsd.org>, 252a05523f
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
---
 sbin/pfctl/parse.y            | 43 ++++++++++--------------
 share/man/man5/pf.conf.5      | 10 +++---
 sys/netpfil/pf/pf.h           |  5 +++
 sys/netpfil/pf/pf_lb.c        | 74 ++++++++++++++++++++++++++++++++++-------
 tests/sys/netpfil/pf/nat64.sh | 76 +++++++++++++++++++++++++++++++++----------
 5 files changed, 148 insertions(+), 60 deletions(-)

diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index f3334c961909..8f732bdf268a 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -2747,16 +2747,16 @@ pfrule          : action dir logquick interface route 
af proto fromto
                                    $5.host->addr.type == PF_ADDR_TABLE ||
                                    DYNIF_MULTIADDR($5.host->addr)))
                                        r.route.opts |= PF_POOL_ROUNDROBIN;
-                               if ((r.route.opts & PF_POOL_TYPEMASK) !=
-                                   PF_POOL_ROUNDROBIN &&
-                                   disallow_table($5.host, "tables are only "
-                                   "supported in round-robin routing pools"))
-                                       YYERROR;
-                               if ((r.route.opts & PF_POOL_TYPEMASK) !=
-                                   PF_POOL_ROUNDROBIN &&
-                                   disallow_alias($5.host, "interface (%s) "
-                                   "is only supported in round-robin "
-                                   "routing pools"))
+                               if ($5.host->next != NULL &&
+                                   !PF_POOL_DYNTYPE(r.route.opts)) {
+                                       yyerror("address pool option "
+                                           "not supported by type");
+                               }
+                               if (!PF_POOL_DYNTYPE(r.route.opts) &&
+                                   (disallow_table($5.host,
+                                   "tables are not supported by pool type") ||
+                                   disallow_alias($5.host,
+                                   "interface (%s) is not supported by pool 
type")))
                                        YYERROR;
                                if ($5.host->next != NULL) {
                                        if ((r.route.opts & PF_POOL_TYPEMASK) !=
@@ -2829,10 +2829,9 @@ pfrule           : action dir logquick interface route 
af proto fromto
                                r.nat.opts = $9.nat.pool_opts.type;
                                r.nat.opts |= $9.nat.pool_opts.opts;
 
-                               if ((r.nat.opts & PF_POOL_TYPEMASK) !=
-                                   PF_POOL_ROUNDROBIN &&
-                                   disallow_table($9.nat.rdr->host, "tables 
are only "
-                                   "supported in round-robin pools"))
+                               if (!PF_POOL_DYNTYPE(r.nat.opts) &&
+                                   disallow_table($9.nat.rdr->host, "tables 
are not "
+                                   "supported by pool type"))
                                        YYERROR;
                        }
 
@@ -4916,17 +4915,11 @@ natrule         : nataction interface af proto fromto 
tag tagged rtable
                                    $9->host->addr.type == PF_ADDR_TABLE ||
                                    DYNIF_MULTIADDR($9->host->addr)))
                                        r.rdr.opts = PF_POOL_ROUNDROBIN;
-                               if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
-                                   PF_POOL_ROUNDROBIN &&
-                                   disallow_table($9->host, "tables are only "
-                                   "supported in round-robin redirection "
-                                   "pools"))
-                                       YYERROR;
-                               if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
-                                   PF_POOL_ROUNDROBIN &&
-                                   disallow_alias($9->host, "interface (%s) "
-                                   "is only supported in round-robin "
-                                   "redirection pools"))
+                               if (!PF_POOL_DYNTYPE(r.rdr.opts) &&
+                                   (disallow_table($9->host,
+                                   "tables are not supported by pool type") ||
+                                   disallow_alias($9->host,
+                                   "interface (%s) is not supported by pool 
type")))
                                        YYERROR;
                                if ($9->host->next != NULL) {
                                        if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
index 75cb0b39272f..1192cd3d02e8 100644
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -27,7 +27,7 @@
 .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd February 11, 2025
+.Dd February 13, 2025
 .Dt PF.CONF 5
 .Os
 .Sh NAME
@@ -132,8 +132,8 @@ Tables can also be used for the redirect address of
 .Ar nat
 and
 .Ar rdr
-rules and in the routing options of filter rules, but only for
-.Ar round-robin
+and in the routing options of filter rules, but not for
+.Ar bitmask
 pools.
 .Pp
 Tables can be defined with any of the following
@@ -2335,8 +2335,8 @@ The
 option loops through the redirection address(es).
 .Pp
 When more than one redirection address is specified,
-.Ar round-robin
-is the only permitted pool type.
+.Ar bitmask
+is not permitted as a pool type.
 .It Ar static-port
 With
 .Ar nat
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index dfa86e7f1d6d..b5c0eeaa8f01 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -133,6 +133,11 @@ enum       { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, 
PF_ADDR_DYNIFTL,
 #define        PF_WSCALE_FLAG          0x80
 #define        PF_WSCALE_MASK          0x0f
 
+#define PF_POOL_DYNTYPE(_o)                                    \
+       ((((_o) & PF_POOL_TYPEMASK) == PF_POOL_ROUNDROBIN) ||   \
+       (((_o) & PF_POOL_TYPEMASK) == PF_POOL_RANDOM) ||        \
+       (((_o) & PF_POOL_TYPEMASK) == PF_POOL_SRCHASH))
+
 #define        PF_LOG                  0x01
 #define        PF_LOG_ALL              0x02
 #define        PF_LOG_SOCKET_LOOKUP    0x04
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 83dd63fd2290..d1ba2495dc30 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -73,7 +73,7 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
 
 #define DPFPRINTF(n, x)        if (V_pf_status.debug >= (n)) printf x
 
-static void             pf_hash(struct pf_addr *, struct pf_addr *,
+static uint64_t                 pf_hash(struct pf_addr *, struct pf_addr *,
                            struct pf_poolhashkey *, sa_family_t);
 static struct pf_krule *pf_match_translation(struct pf_pdesc *,
                            int, struct pf_kanchor_stackframe *);
@@ -84,7 +84,7 @@ static int             pf_get_sport(struct pf_pdesc *, struct 
pf_krule *,
                            pf_sn_types_t);
 static bool             pf_islinklocal(const sa_family_t, const struct pf_addr 
*);
 
-static void
+static uint64_t
 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
     struct pf_poolhashkey *key, sa_family_t af)
 {
@@ -95,20 +95,23 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
                uint32_t hash32[2];
        } h;
 #endif
+       uint64_t         res = 0;
 
        _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
 
        switch (af) {
 #ifdef INET
        case AF_INET:
-               hash->addr32[0] = SipHash24(&ctx, (const uint8_t *)key,
+               res = SipHash24(&ctx, (const uint8_t *)key,
                    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
+               hash->addr32[0] = res;
                break;
 #endif /* INET */
 #ifdef INET6
        case AF_INET6:
-               h.hash64 = SipHash24(&ctx, (const uint8_t *)key,
+               res = SipHash24(&ctx, (const uint8_t *)key,
                    &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
+               h.hash64 = res;
                hash->addr32[0] = h.hash32[0];
                hash->addr32[1] = h.hash32[1];
                /*
@@ -120,6 +123,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
                break;
 #endif /* INET6 */
        }
+       return (res);
 }
 
 static struct pf_krule *
@@ -459,6 +463,8 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
 {
        u_short                  reason = PFRES_MATCH;
        struct pf_addr          *raddr = NULL, *rmask = NULL;
+       uint64_t                 hashidx;
+       int                      cnt;
 
        mtx_lock(&rpool->mtx);
        /* Find the route using chosen algorithm. Store the found route
@@ -472,8 +478,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
 #ifdef INET
                case AF_INET:
                        if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
-                           (rpool->opts & PF_POOL_TYPEMASK) !=
-                           PF_POOL_ROUNDROBIN) {
+                           !PF_POOL_DYNTYPE(rpool->opts)) {
                                reason = PFRES_MAPFAILED;
                                goto done_pool_mtx;
                        }
@@ -484,8 +489,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
 #ifdef INET6
                case AF_INET6:
                        if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
-                           (rpool->opts & PF_POOL_TYPEMASK) !=
-                           PF_POOL_ROUNDROBIN) {
+                           !PF_POOL_DYNTYPE(rpool->opts)) {
                                reason = PFRES_MAPFAILED;
                                goto done_pool_mtx;
                        }
@@ -495,7 +499,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
 #endif /* INET6 */
                }
        } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
-               if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
+               if (!PF_POOL_DYNTYPE(rpool->opts)) {
                        reason = PFRES_MAPFAILED;
                        goto done_pool_mtx; /* unsupported */
                }
@@ -512,7 +516,28 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
                PF_POOLMASK(naddr, raddr, rmask, saddr, af);
                break;
        case PF_POOL_RANDOM:
-               if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+               if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+                       cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+                       rpool->tblidx = (int)arc4random_uniform(cnt);
+                       memset(&rpool->counter, 0, sizeof(rpool->counter));
+                       if (pfr_pool_get(rpool->cur->addr.p.tbl,
+                           &rpool->tblidx, &rpool->counter, af, NULL)) {
+                               reason = PFRES_MAPFAILED;
+                               goto done_pool_mtx; /* unsupported */
+                       }
+                       PF_ACPY(naddr, &rpool->counter, af);
+               } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+                       cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+                       rpool->tblidx = (int)arc4random_uniform(cnt);
+                       memset(&rpool->counter, 0, sizeof(rpool->counter));
+                       if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+                           &rpool->tblidx, &rpool->counter, af,
+                           pf_islinklocal)) {
+                               reason = PFRES_MAPFAILED;
+                               goto done_pool_mtx; /* unsupported */
+                       }
+                       PF_ACPY(naddr, &rpool->counter, af);
+               } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
                        switch (af) {
 #ifdef INET
                        case AF_INET:
@@ -554,8 +579,33 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct 
pf_addr *saddr,
            {
                unsigned char hash[16];
 
-               pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
-               PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+               hashidx =
+                   pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+               if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+                       cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+                       rpool->tblidx = (int)(hashidx % cnt);
+                       memset(&rpool->counter, 0, sizeof(rpool->counter));
+                       if (pfr_pool_get(rpool->cur->addr.p.tbl,
+                           &rpool->tblidx, &rpool->counter, af, NULL)) {
+                               reason = PFRES_MAPFAILED;
+                               goto done_pool_mtx; /* unsupported */
+                       }
+                       PF_ACPY(naddr, &rpool->counter, af);
+               } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+                       cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+                       rpool->tblidx = (int)(hashidx % cnt);
+                       memset(&rpool->counter, 0, sizeof(rpool->counter));
+                       if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+                           &rpool->tblidx, &rpool->counter, af,
+                           pf_islinklocal)) {
+                               reason = PFRES_MAPFAILED;
+                               goto done_pool_mtx; /* unsupported */
+                       }
+                       PF_ACPY(naddr, &rpool->counter, af);
+               } else {
+                       PF_POOLMASK(naddr, raddr, rmask,
+                           (struct pf_addr *)&hash, af);
+               }
                break;
            }
        case PF_POOL_ROUNDROBIN:
diff --git a/tests/sys/netpfil/pf/nat64.sh b/tests/sys/netpfil/pf/nat64.sh
index 9cc6aececc42..68d9191cacb9 100644
--- a/tests/sys/netpfil/pf/nat64.sh
+++ b/tests/sys/netpfil/pf/nat64.sh
@@ -409,7 +409,7 @@ pool_cleanup()
 atf_test_case "table"
 table_head()
 {
-       atf_set descr 'Tables require round-robin'
+       atf_set descr 'Check table restrictions'
        atf_set require.user root
 }
 
@@ -417,9 +417,18 @@ table_body()
 {
        pft_init
 
-       echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from 
<wanaddr>" | \
+       # Round-robin and random are allowed
+       echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from 
<wanaddr> round-robin" | \
+           atf_check -s exit:0 \
+           pfctl -f -
+       echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from 
<wanaddr> random" | \
+           atf_check -s exit:0 \
+           pfctl -f -
+
+       # bitmask is not
+       echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from 
<wanaddr> bitmask" | \
            atf_check -s exit:1 \
-           -e match:"tables are only supported in round-robin pools" \
+           -e match:"tables are not supported by pool type" \
            pfctl -f -
 }
 
@@ -489,15 +498,11 @@ table_range_cleanup()
        pft_cleanup
 }
 
-atf_test_case "table_round_robin" "cleanup"
-table_round_robin_head()
+table_common_body()
 {
-       atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
-       atf_set require.user root
-}
+       pool_type=$1
+echo pool_type=${pool_type}
 
-table_round_robin_body()
-{
        pft_init
 
        epair_link=$(vnet_mkepair)
@@ -527,7 +532,7 @@ table_round_robin_body()
            "set reassemble yes" \
            "set state-policy if-bound" \
            "table <wanaddrs> { 192.0.2.1, 192.0.2.3, 192.0.2.4 }" \
-           "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet 
from <wanaddrs> round-robin"
+           "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet 
from <wanaddrs> ${pool_type}"
 
        # Use pf to count sources
        jexec dst pfctl -e
@@ -541,13 +546,30 @@ table_round_robin_body()
        atf_check -s exit:0 -o ignore \
            ping6 -c 1 64:ff9b::192.0.2.2
 
-       # Verify on dst that we saw different source addresses
-       atf_check -s exit:0 -o match:".*192.0.2.1.*" \
-           jexec dst pfctl -ss
-       atf_check -s exit:0 -o match:".*192.0.2.3.*" \
-           jexec dst pfctl -ss
-       atf_check -s exit:0 -o match:".*192.0.2.4.*" \
-           jexec dst pfctl -ss
+       # XXX We can't reasonably check pool type random because it's random. 
It may end
+       # up choosing the same source IP for all three connections.
+       if [ "${pool_type}" == "round-robin" ];
+       then
+               # Verify on dst that we saw different source addresses
+               atf_check -s exit:0 -o match:".*192.0.2.1.*" \
+                   jexec dst pfctl -ss
+               atf_check -s exit:0 -o match:".*192.0.2.3.*" \
+                   jexec dst pfctl -ss
+               atf_check -s exit:0 -o match:".*192.0.2.4.*" \
+                   jexec dst pfctl -ss
+       fi
+}
+
+atf_test_case "table_round_robin" "cleanup"
+table_round_robin_head()
+{
+       atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
+       atf_set require.user root
+}
+
+table_round_robin_body()
+{
+       table_common_body round-robin
 }
 
 table_round_robin_cleanup()
@@ -555,6 +577,23 @@ table_round_robin_cleanup()
        pft_cleanup
 }
 
+atf_test_case "table_random" "cleanup"
+table_random_head()
+{
+       atf_set descr 'Use a table of IPv4 addresses in random mode'
+       atf_set require.user root
+}
+
+table_random_body()
+{
+       table_common_body random
+}
+
+table_random_cleanup()
+{
+       pft_cleanup
+}
+
 atf_test_case "dummynet" "cleanup"
 dummynet_head()
 {
@@ -775,6 +814,7 @@ atf_init_test_cases()
        atf_add_test_case "table"
        atf_add_test_case "table_range"
        atf_add_test_case "table_round_robin"
+       atf_add_test_case "table_random"
        atf_add_test_case "dummynet"
        atf_add_test_case "gateway6"
        atf_add_test_case "route_to"

Reply via email to