Ruslan,

  okay, I have implemented a feature that allows to merge your ruleset
into one rule. Look how it works:

[EMAIL PROTECTED]:~:|>kldload dummynet
[EMAIL PROTECTED]:~:|>ipfw pipe 2 config
[EMAIL PROTECTED]:~:|>ipfw pipe 3 config
[EMAIL PROTECTED]:~:|>ipfw add 1000 pipe 65535 ip from any to table\(1\) 
01000 pipe 65535 ip from any to table(1)
[EMAIL PROTECTED]:~:|>ipfw table 1 add 217.72.144.35 2
[EMAIL PROTECTED]:~:|>ipfw table 1 add 217.72.144.68 3
[EMAIL PROTECTED]:~:|>ipfw table 1 list
217.72.144.35/32 2
217.72.144.68/32 3
[EMAIL PROTECTED]:~:|>ping cell.sick.ru
PING cell.sick.ru (217.72.144.68): 56 data bytes
64 bytes from 217.72.144.68: icmp_seq=0 ttl=60 time=2.653 ms
^C
--- cell.sick.ru ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max/stddev = 2.653/2.653/2.653/0.000 ms
[EMAIL PROTECTED]:~:|>ping unixfaq.ru
PING unixfaq.ru (217.72.144.35): 56 data bytes
64 bytes from 217.72.144.35: icmp_seq=0 ttl=60 time=2.563 ms
^C
--- unixfaq.ru ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max/stddev = 2.563/2.563/2.563/0.000 ms
[EMAIL PROTECTED]:~:|>ipfw pipe 2 show
00002: unlimited    0 ms   50 sl. 1 queues (1 buckets) droptail
    mask: 0x00 0x00000000/0x0000 -> 0x00000000/0x0000
BKT Prot ___Source IP/port____ ____Dest. IP/port____ Tot_pkt/bytes Pkt/Byte Drp
  0 icmp    81.19.64.118/0       217.72.144.35/0        1       84  0    0   0
[EMAIL PROTECTED]:~:|>ipfw pipe 3 show
00003: unlimited    0 ms   50 sl. 1 queues (1 buckets) droptail
    mask: 0x00 0x00000000/0x0000 -> 0x00000000/0x0000
BKT Prot ___Source IP/port____ ____Dest. IP/port____ Tot_pkt/bytes Pkt/Byte Drp
  0 icmp    81.19.64.118/0       217.72.144.68/0        1       84  0    0   0

The number 65535 is some magic number, which means "take argument from table". I
will make ipfw display some word instead of 65535, for example "tablearg". So, 
the
rule will be looking like:

        pipe tablearg ip from any to table(1)

Implementing this feature I have encountered a problem, that I have encountered
before - the locate_flowset() function in ip_dummynet.c. The problem is that
dummynet and ipfw are so welded together. The rule itself points into a pipe,
and this puts a design limit that a packet from a rule can go only into one
pipe. So, I removed this limitation removing the pointer from rule to pipe. This
change also garbage-collected several XXX in the code. Yes, I've made a 
regression
here for the time for pipe/queue lookup. But this is a proof-of-concept patch.
Before checking in I will make a hash for pipes/queues in dummynet. This change
also is a step forward in divorcing dummynet and ipfw, and thus a step closer
to ng_dummynet node :).

The proof-of-concept patch attached.

Before committing it will be divided into three parts:

1) unlock the tables and remove caching
2) remove the rule->pipe pointer and implement hash for pipes/queues
3) introduce the tablearg feature

-- 
Totus tuus, Glebius.
GLEBIUS-RIPN GLEB-RIPE
Index: ip_dummynet.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.c,v
retrieving revision 1.95
diff -u -r1.95 ip_dummynet.c
--- ip_dummynet.c       27 Sep 2005 18:10:42 -0000      1.95
+++ ip_dummynet.c       28 Nov 2005 15:30:31 -0000
@@ -1111,19 +1111,11 @@
 struct dn_flow_set *
 locate_flowset(int pipe_nr, struct ip_fw *rule)
 {
-    struct dn_flow_set *fs;
+    struct dn_flow_set *fs = NULL;
     ipfw_insn *cmd = ACTION_PTR(rule);
 
     if (cmd->opcode == O_LOG)
        cmd += F_LEN(cmd);
-#ifdef __i386__
-    fs = ((ipfw_insn_pipe *)cmd)->pipe_ptr;
-#else
-    bcopy(& ((ipfw_insn_pipe *)cmd)->pipe_ptr, &fs, sizeof(fs));
-#endif
-
-    if (fs != NULL)
-       return fs;
 
     if (cmd->opcode == O_QUEUE)
        for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next)
@@ -1135,12 +1127,6 @@
        if (p1 != NULL)
            fs = &(p1->fs) ;
     }
-    /* record for the future */
-#ifdef __i386__
-    ((ipfw_insn_pipe *)cmd)->pipe_ptr = fs;
-#else
-    bcopy(&fs, & ((ipfw_insn_pipe *)cmd)->pipe_ptr, sizeof(fs));
-#endif
     return fs ;
 }
 
@@ -1407,8 +1393,6 @@
     struct dn_flow_set *fs, *curr_fs;
 
     DUMMYNET_LOCK();
-    /* remove all references to pipes ...*/
-    flush_pipe_ptrs(NULL);
     /* prevent future matches... */
     p = all_pipes ;
     all_pipes = NULL ;
@@ -1811,8 +1795,6 @@
            all_pipes = b->next ;
        else
            a->next = b->next ;
-       /* remove references to this pipe from the ip_fw rules. */
-       flush_pipe_ptrs(&(b->fs));
 
        /* remove all references to this pipe from flow_sets */
        for (fs = all_flow_sets; fs; fs= fs->next )
@@ -1846,8 +1828,6 @@
            all_flow_sets = b->next ;
        else
            a->next = b->next ;
-       /* remove references to this flow_set from the ip_fw rules. */
-       flush_pipe_ptrs(b);
 
        if (b->pipe != NULL) {
            /* Update total weight on parent pipe and cleanup parent heaps */
Index: ip_fw.h
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_fw.h,v
retrieving revision 1.101
diff -u -r1.101 ip_fw.h
--- ip_fw.h     13 Aug 2005 11:02:33 -0000      1.101
+++ ip_fw.h     28 Nov 2005 15:37:05 -0000
@@ -271,19 +271,6 @@
 } ipfw_insn_if;
 
 /*
- * This is used for pipe and queue actions, which need to store
- * a single pointer (which can have different size on different
- * architectures.
- * Note that, because of previous instructions, pipe_ptr might
- * be unaligned in the overall structure, so it needs to be
- * manipulated with care.
- */
-typedef struct _ipfw_insn_pipe {
-       ipfw_insn       o;
-       void            *pipe_ptr;      /* XXX */
-} ipfw_insn_pipe;
-
-/*
  * This is used for storing an altq queue id number.
  */
 typedef struct _ipfw_insn_altq {
@@ -474,6 +461,8 @@
        ipfw_table_entry ent[0];        /* entries                      */
 } ipfw_table;
 
+#define        IP_FW_TABLECOOKIE       USHRT_MAX
+
 /*
  * Main firewall chains definitions and global var's definitions.
  */
@@ -546,8 +535,6 @@
 int ipfw_init(void);
 void ipfw_destroy(void);
 
-void flush_pipe_ptrs(struct dn_flow_set *match); /* used by dummynet */
-
 typedef int ip_fw_ctl_t(struct sockopt *);
 extern ip_fw_ctl_t *ip_fw_ctl_ptr;
 extern int fw_one_pass;
Index: ip_fw2.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.115
diff -u -r1.115 ip_fw2.c
--- ip_fw2.c    10 Nov 2005 22:10:39 -0000      1.115
+++ ip_fw2.c    28 Nov 2005 15:30:05 -0000
@@ -51,6 +51,7 @@
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/jail.h>
+#include <sys/limits.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
@@ -126,9 +127,11 @@
        int             fw_prid;
 };
 
+#define        IPFW_TABLES_MAX         128
 struct ip_fw_chain {
        struct ip_fw    *rules;         /* list of rules */
        struct ip_fw    *reap;          /* list of rules to reap */
+       struct radix_node_head *tables[IPFW_TABLES_MAX];
        struct mtx      mtx;            /* lock guarding rule list */
        int             busy_count;     /* busy count for rw locks */
        int             want_write;
@@ -192,15 +195,6 @@
        u_int32_t               value;
 };
 
-#define        IPFW_TABLES_MAX         128
-static struct ip_fw_table {
-       struct radix_node_head  *rnh;
-       int                     modified;
-       in_addr_t               last_addr;
-       int                     last_match;
-       u_int32_t               last_value;
-} ipfw_tables[IPFW_TABLES_MAX];
-
 static int fw_debug = 1;
 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
 
@@ -1703,25 +1697,24 @@
 }
 
 static void
-init_tables(void)
+init_tables(struct ip_fw_chain *ch)
 {
        int i;
 
-       for (i = 0; i < IPFW_TABLES_MAX; i++) {
-               rn_inithead((void **)&ipfw_tables[i].rnh, 32);
-               ipfw_tables[i].modified = 1;
-       }
+       for (i = 0; i < IPFW_TABLES_MAX; i++)
+               rn_inithead((void **)&ch->tables[i], 32);
 }
 
 static int
-add_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen, u_int32_t value)
+add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+       uint8_t mlen, uint32_t value)
 {
        struct radix_node_head *rnh;
        struct table_entry *ent;
 
        if (tbl >= IPFW_TABLES_MAX)
                return (EINVAL);
-       rnh = ipfw_tables[tbl].rnh;
+       rnh = ch->tables[tbl];
        ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO);
        if (ent == NULL)
                return (ENOMEM);
@@ -1729,20 +1722,20 @@
        ent->addr.sin_len = ent->mask.sin_len = 8;
        ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
        ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr;
-       RADIX_NODE_HEAD_LOCK(rnh);
+       IPFW_WLOCK(&layer3_chain);
        if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) ==
            NULL) {
-               RADIX_NODE_HEAD_UNLOCK(rnh);
+               IPFW_WUNLOCK(&layer3_chain);
                free(ent, M_IPFW_TBL);
                return (EEXIST);
        }
-       ipfw_tables[tbl].modified = 1;
-       RADIX_NODE_HEAD_UNLOCK(rnh);
+       IPFW_WUNLOCK(&layer3_chain);
        return (0);
 }
 
 static int
-del_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen)
+del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+       uint8_t mlen)
 {
        struct radix_node_head *rnh;
        struct table_entry *ent;
@@ -1750,18 +1743,17 @@
 
        if (tbl >= IPFW_TABLES_MAX)
                return (EINVAL);
-       rnh = ipfw_tables[tbl].rnh;
+       rnh = ch->tables[tbl];
        sa.sin_len = mask.sin_len = 8;
        mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
        sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr;
-       RADIX_NODE_HEAD_LOCK(rnh);
+       IPFW_WLOCK(ch);
        ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh);
        if (ent == NULL) {
-               RADIX_NODE_HEAD_UNLOCK(rnh);
+               IPFW_WUNLOCK(ch);
                return (ESRCH);
        }
-       ipfw_tables[tbl].modified = 1;
-       RADIX_NODE_HEAD_UNLOCK(rnh);
+       IPFW_WUNLOCK(ch);
        free(ent, M_IPFW_TBL);
        return (0);
 }
@@ -1780,63 +1772,48 @@
 }
 
 static int
-flush_table(u_int16_t tbl)
+flush_table(struct ip_fw_chain *ch, uint16_t tbl)
 {
        struct radix_node_head *rnh;
 
+       IPFW_WLOCK_ASSERT(ch);
+
        if (tbl >= IPFW_TABLES_MAX)
                return (EINVAL);
-       rnh = ipfw_tables[tbl].rnh;
-       RADIX_NODE_HEAD_LOCK(rnh);
+       rnh = ch->tables[tbl];
        rnh->rnh_walktree(rnh, flush_table_entry, rnh);
-       ipfw_tables[tbl].modified = 1;
-       RADIX_NODE_HEAD_UNLOCK(rnh);
        return (0);
 }
 
 static void
-flush_tables(void)
+flush_tables(struct ip_fw_chain *ch)
 {
-       u_int16_t tbl;
+       uint16_t tbl;
+
+       IPFW_WLOCK_ASSERT(ch);
 
        for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++)
-               flush_table(tbl);
+               flush_table(ch, tbl);
 }
 
 static int
-lookup_table(u_int16_t tbl, in_addr_t addr, u_int32_t *val)
+lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
+       uint32_t *val)
 {
        struct radix_node_head *rnh;
-       struct ip_fw_table *table;
        struct table_entry *ent;
        struct sockaddr_in sa;
-       int last_match;
 
        if (tbl >= IPFW_TABLES_MAX)
                return (0);
-       table = &ipfw_tables[tbl];
-       rnh = table->rnh;
-       RADIX_NODE_HEAD_LOCK(rnh);
-       if (addr == table->last_addr && !table->modified) {
-               last_match = table->last_match;
-               if (last_match)
-                       *val = table->last_value;
-               RADIX_NODE_HEAD_UNLOCK(rnh);
-               return (last_match);
-       }
-       table->modified = 0;
+       rnh = ch->tables[tbl];
        sa.sin_len = 8;
        sa.sin_addr.s_addr = addr;
        ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
-       table->last_addr = addr;
        if (ent != NULL) {
-               table->last_value = *val = ent->value;
-               table->last_match = 1;
-               RADIX_NODE_HEAD_UNLOCK(rnh);
+               *val = ent->value;
                return (1);
        }
-       table->last_match = 0;
-       RADIX_NODE_HEAD_UNLOCK(rnh);
        return (0);
 }
 
@@ -1850,17 +1827,15 @@
 }
 
 static int
-count_table(u_int32_t tbl, u_int32_t *cnt)
+count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt)
 {
        struct radix_node_head *rnh;
 
        if (tbl >= IPFW_TABLES_MAX)
                return (EINVAL);
-       rnh = ipfw_tables[tbl].rnh;
+       rnh = ch->tables[tbl];
        *cnt = 0;
-       RADIX_NODE_HEAD_LOCK(rnh);
        rnh->rnh_walktree(rnh, count_table_entry, cnt);
-       RADIX_NODE_HEAD_UNLOCK(rnh);
        return (0);
 }
 
@@ -1886,17 +1861,17 @@
 }
 
 static int
-dump_table(ipfw_table *tbl)
+dump_table(struct ip_fw_chain *ch, ipfw_table *tbl)
 {
        struct radix_node_head *rnh;
 
+       IPFW_WLOCK_ASSERT(ch);
+
        if (tbl->tbl >= IPFW_TABLES_MAX)
                return (EINVAL);
-       rnh = ipfw_tables[tbl->tbl].rnh;
+       rnh = ch->tables[tbl->tbl];
        tbl->cnt = 0;
-       RADIX_NODE_HEAD_LOCK(rnh);
        rnh->rnh_walktree(rnh, dump_table_entry, tbl);
-       RADIX_NODE_HEAD_UNLOCK(rnh);
        return (0);
 }
 
@@ -2409,9 +2384,9 @@
         * Now scan the rules, and parse microinstructions for each rule.
         */
        for (; f; f = f->next) {
-               int l, cmdlen;
                ipfw_insn *cmd;
-               int skip_or; /* skip rest of OR block */
+               uint32_t tablearg = 0;
+               int l, cmdlen, skip_or; /* skip rest of OR block */
 
 again:
                if (set_disable & (1 << f->set) )
@@ -2567,12 +2542,15 @@
                                            dst_ip.s_addr : src_ip.s_addr;
                                    uint32_t v;
 
-                                   match = lookup_table(cmd->arg1, a, &v);
+                                   match = lookup_table(chain, cmd->arg1, a,
+                                       &v);
                                    if (!match)
                                        break;
                                    if (cmdlen == F_INSN_SIZE(ipfw_insn_u32))
                                        match =
                                            ((ipfw_insn_u32 *)cmd)->d[0] == v;
+                                   else
+                                       tablearg = v;
                                }
                                break;
 
@@ -3024,7 +3002,10 @@
                        case O_PIPE:
                        case O_QUEUE:
                                args->rule = f; /* report matching rule */
-                               args->cookie = cmd->arg1;
+                               if (cmd->arg1 == IP_FW_TABLECOOKIE)
+                                       args->cookie = tablearg;
+                               else
+                                       args->cookie = cmd->arg1;
                                retval = IP_FW_DUMMYNET;
                                goto done;
 
@@ -3045,7 +3026,10 @@
                                }
                                dt = (struct divert_tag *)(mtag+1);
                                dt->cookie = f->rulenum;
-                               dt->info = cmd->arg1;
+                               if (cmd->arg1 == IP_FW_TABLECOOKIE)
+                                       dt->info = tablearg;
+                               else
+                                       dt->info = cmd->arg1;
                                m_tag_prepend(m, mtag);
                                retval = (cmd->opcode == O_DIVERT) ?
                                    IP_FW_DIVERT : IP_FW_TEE;
@@ -3110,7 +3094,10 @@
                        case O_NETGRAPH:
                        case O_NGTEE:
                                args->rule = f; /* report matching rule */
-                               args->cookie = cmd->arg1;
+                               if (cmd->arg1 == IP_FW_TABLECOOKIE)
+                                       args->cookie = tablearg;
+                               else
+                                       args->cookie = cmd->arg1;
                                retval = (cmd->opcode == O_NETGRAPH) ?
                                    IP_FW_NETGRAPH : IP_FW_NGTEE;
                                goto done;
@@ -3169,34 +3156,6 @@
 }
 
 /*
- * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given
- * pipe/queue, or to all of them (match == NULL).
- */
-void
-flush_pipe_ptrs(struct dn_flow_set *match)
-{
-       struct ip_fw *rule;
-
-       IPFW_WLOCK(&layer3_chain);
-       for (rule = layer3_chain.rules; rule; rule = rule->next) {
-               ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule);
-
-               if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE)
-                       continue;
-               /*
-                * XXX Use bcmp/bzero to handle pipe_ptr to overcome
-                * possible alignment problems on 64-bit architectures.
-                * This code is seldom used so we do not worry too
-                * much about efficiency.
-                */
-               if (match == NULL ||
-                   !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) )
-                       bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr));
-       }
-       IPFW_WUNLOCK(&layer3_chain);
-}
-
-/*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
@@ -3685,7 +3644,7 @@
 
                case O_PIPE:
                case O_QUEUE:
-                       if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
                                goto bad_size;
                        goto check_action;
 
@@ -4012,8 +3971,8 @@
                            sizeof(ent), sizeof(ent));
                        if (error)
                                break;
-                       error = add_table_entry(ent.tbl, ent.addr,
-                           ent.masklen, ent.value);
+                       error = add_table_entry(&layer3_chain, ent.tbl,
+                           ent.addr, ent.masklen, ent.value);
                }
                break;
 
@@ -4025,7 +3984,8 @@
                            sizeof(ent), sizeof(ent));
                        if (error)
                                break;
-                       error = del_table_entry(ent.tbl, ent.addr, ent.masklen);
+                       error = del_table_entry(&layer3_chain, ent.tbl,
+                           ent.addr, ent.masklen);
                }
                break;
 
@@ -4037,7 +3997,9 @@
                            sizeof(tbl), sizeof(tbl));
                        if (error)
                                break;
-                       error = flush_table(tbl);
+                       IPFW_WLOCK(&layer3_chain);
+                       error = flush_table(&layer3_chain, tbl);
+                       IPFW_WUNLOCK(&layer3_chain);
                }
                break;
 
@@ -4048,8 +4010,10 @@
                        if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
                            sizeof(tbl))))
                                break;
-                       if ((error = count_table(tbl, &cnt)))
+                       IPFW_RLOCK(&layer3_chain);
+                       if ((error = count_table(&layer3_chain, tbl, &cnt)))
                                break;
+                       IPFW_RUNLOCK(&layer3_chain);
                        error = sooptcopyout(sopt, &cnt, sizeof(cnt));
                }
                break;
@@ -4075,11 +4039,14 @@
                        }
                        tbl->size = (size - sizeof(*tbl)) /
                            sizeof(ipfw_table_entry);
-                       error = dump_table(tbl);
+                       IPFW_WLOCK(&layer3_chain);
+                       error = dump_table(&layer3_chain, tbl);
                        if (error) {
+                               IPFW_WUNLOCK(&layer3_chain);
                                free(tbl, M_TEMP);
                                break;
                        }
+                       IPFW_WUNLOCK(&layer3_chain);
                        error = sooptcopyout(sopt, tbl, size);
                        free(tbl, M_TEMP);
                }
@@ -4242,7 +4209,7 @@
                printf("limited to %d packets/entry by default\n",
                    verbose_limit);
 
-       init_tables();
+       init_tables(&layer3_chain);
        ip_fw_ctl_ptr = ipfw_ctl;
        ip_fw_chk_ptr = ipfw_chk;
        callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
@@ -4259,13 +4226,13 @@
        ip_fw_ctl_ptr = NULL;
        callout_drain(&ipfw_timeout);
        IPFW_WLOCK(&layer3_chain);
+       flush_tables(&layer3_chain);
        layer3_chain.reap = NULL;
        free_chain(&layer3_chain, 1 /* kill default rule */);
        reap = layer3_chain.reap, layer3_chain.reap = NULL;
        IPFW_WUNLOCK(&layer3_chain);
        if (reap != NULL)
                reap_rules(reap);
-       flush_tables();
        IPFW_DYN_LOCK_DESTROY();
        uma_zdestroy(ipfw_dyn_rule_zone);
        IPFW_LOCK_DESTROY(&layer3_chain);
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to