Dave,
Final patch with feedback from Henner to change the naming convention of
the return codes. Clean it up, polish it, junk it etc.
I'd like also to send you a large patch or a series of patches to use the
NET_RX_* codes by the protocols. eg patch:
--------------------------------------------------------
--- ip_input.c 2000/09/23 12:48:56 1.1
+++ ip_input.c 2000/09/23 12:52:52
@@ -341,7 +341,7 @@
skb = skb_cow(skb, skb_headroom(skb));
if (skb == NULL)
- return 0;
+ return NET_RX_DROP;
iph = skb->nh.iph;
skb->ip_summed = 0;
@@ -372,7 +372,7 @@
IP_INC_STATS_BH(IpInHdrErrors);
drop:
kfree_skb(skb);
- return(0);
+ return NET_RX_DROP;
}
/*
@@ -429,6 +429,6 @@
drop:
kfree_skb(skb);
out:
- return(0);
+ return NET_RX_DROP;
}
------------------------------------------------------
I realize nobody is using those codes but they would be useful and will
enforce consistency.
cheers,
jamal
--- linux/net/core/dev.c.orig Thu Sep 7 11:32:01 2000
+++ linux/net/core/dev.c Tue Sep 12 20:02:20 2000
@@ -59,6 +59,8 @@
* Paul Rusty Russell : SIOCSIFNAME
* Pekka Riikonen : Netdev boot-time settings code
* Andrew Morton : Make unregister_netdevice wait indefinitely
on dev->refcnt
+ * J Hadi Salim : - Backlog queue sampling
+ * - netif_rx() feedback
*/
#include <asm/uaccess.h>
@@ -97,6 +99,9 @@
extern int plip_init(void);
#endif
+/*
+#define RAND_LIE 1
+*/
NET_PROFILE_DEFINE(dev_queue_xmit)
NET_PROFILE_DEFINE(softnet_process)
@@ -133,6 +138,11 @@
static struct packet_type *ptype_base[16]; /* 16 way hashed list */
static struct packet_type *ptype_all = NULL; /* Taps */
+#ifdef offline_sample
+static void sample_queue(unsigned long dummy);
+static struct timer_list samp_timer = { { NULL, NULL }, 0, 0, &sample_queue };
+#endif
+
/*
* Our notifier list
*/
@@ -951,12 +961,25 @@
=======================================================================*/
int netdev_max_backlog = 300;
+/* these numbers are selected based on intuition and some
+experimentatiom, if you have more scientific way of doing this
+please go ahead and fix things
+*/
+int no_cong_thresh=10;
+int no_cong=20;
+int lo_cong=100;
+int mod_cong=290;
+
+
struct netif_rx_stats netdev_rx_stat[NR_CPUS];
#ifdef CONFIG_NET_HW_FLOWCONTROL
+/*
static atomic_t netdev_dropping = ATOMIC_INIT(0);
+*/
+atomic_t netdev_dropping = ATOMIC_INIT(0);
static unsigned long netdev_fc_mask = 1;
unsigned long netdev_fc_xoff = 0;
spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
@@ -1014,6 +1037,56 @@
}
#endif
+static void get_sample_stats(int cpu)
+{
+#ifdef RAND_LIE
+ unsigned long rd;
+ int rq;
+#endif
+ int blog=softnet_data[cpu].input_pkt_queue.qlen;
+ int avg_blog=softnet_data[cpu].avg_blog;
+
+ avg_blog=(avg_blog >> 1)+ (blog>>1);
+
+ if (avg_blog > mod_cong) {
+/* above moderate congestion levels */
+ softnet_data[cpu].cng_level= NET_RX_CN_HIGH;
+#ifdef RAND_LIE
+ rd=net_random();
+ rq=rd% netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ softnet_data[cpu].cng_level=NET_RX_DROP;
+#endif
+ } else if (avg_blog > lo_cong) {
+ softnet_data[cpu].cng_level= NET_RX_CN_MOD;
+#ifdef RAND_LIE
+ rd=net_random();
+ rq=rd% netdev_max_backlog;
+ if (rq < avg_blog) /* unlucky bastard */
+ softnet_data[cpu].cng_level=NET_RX_CN_HIGH;
+#endif
+ } else if (avg_blog > no_cong)
+ softnet_data[cpu].cng_level= NET_RX_CN_LOW;
+ else /* no congestion */
+ softnet_data[cpu].cng_level=NET_RX_SUCCESS;
+
+ softnet_data[cpu].avg_blog=avg_blog;
+
+}
+
+#ifdef offline_samp
+static void sample_queue(unsigned long dummy)
+{
+/* 10 ms 0r 1ms -- i dont care -- JHS */
+ int next_tick=1;
+ int cpu=smp_processor_id();
+ get_sample_stats(cpu);
+ next_tick+=jiffies;
+ mod_timer(&samp_timer, next_tick);
+}
+#endif
+
+
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
@@ -1022,9 +1095,18 @@
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
+ *
+ * return values:
+ * NET_RX_SUCCESS (no congestion)
+ * NET_RX_CN_LOW (low congestion)
+ * NET_RX_CN_MOD (moderate congestion)
+ * NET_RX_CN_HIGH (high congestion)
+ * NET_RX_DROP (packet was dropped)
+ *
+ *
*/
-void netif_rx(struct sk_buff *skb)
+int netif_rx(struct sk_buff *skb)
{
int this_cpu = smp_processor_id();
struct softnet_data *queue;
@@ -1043,6 +1125,7 @@
netdev_rx_stat[this_cpu].total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
+
if (queue->throttle)
goto drop;
@@ -1054,11 +1137,16 @@
__skb_queue_tail(&queue->input_pkt_queue,skb);
__cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
local_irq_restore(flags);
- return;
+#ifndef offline_samp
+ get_sample_stats(this_cpu);
+#endif
+ return softnet_data[this_cpu].cng_level;
+
}
if (queue->throttle) {
queue->throttle = 0;
+
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
@@ -1080,6 +1168,8 @@
local_irq_restore(flags);
kfree_skb(skb);
+ return NET_RX_DROP;
+
}
/* Deliver skb to an old protocol, which is not threaded well
@@ -1292,12 +1382,28 @@
if (bugdet-- < 0 || jiffies - start_time > 1)
goto softnet_break;
+
+/*
+*/
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+ if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
+ if (atomic_dec_and_test(&netdev_dropping)) {
+ queue->throttle = 0;
+ netdev_wakeup();
+ goto softnet_break;
+ }
+
+ }
+#endif
+
+
}
br_read_unlock(BR_NETPROTO_LOCK);
local_irq_disable();
if (queue->throttle) {
queue->throttle = 0;
+
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (atomic_dec_and_test(&netdev_dropping))
netdev_wakeup();
@@ -2418,6 +2524,7 @@
int __init net_dev_init(void)
{
struct net_device *dev, **dp;
+
int i;
#ifdef CONFIG_NET_SCHED
@@ -2434,6 +2541,8 @@
queue = &softnet_data[i];
skb_queue_head_init(&queue->input_pkt_queue);
queue->throttle = 0;
+ queue->cng_level = 0;
+ queue->avg_blog = 10; /* arbitrary non-zero */
queue->completion_queue = NULL;
}
@@ -2442,6 +2551,12 @@
NET_PROFILE_REGISTER(dev_queue_xmit);
NET_PROFILE_REGISTER(softnet_process);
#endif
+
+#ifdef offline_samp
+ samp_timer.expires=jiffies+(10 * HZ);
+ add_timer(&samp_timer);
+#endif
+
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
@@ -2499,6 +2614,7 @@
#ifdef CONFIG_PROC_FS
proc_net_create("dev", 0, dev_get_info);
create_proc_read_entry("net/softnet_stat", 0, 0, dev_proc_stats, NULL);
+
#ifdef WIRELESS_EXT
proc_net_create("wireless", 0, dev_get_wireless_info);
#endif /* WIRELESS_EXT */
--- linux/include/linux/netdevice.h.orig Fri Sep 8 15:53:53 2000
+++ linux/include/linux/netdevice.h Tue Sep 12 20:01:25 2000
@@ -47,6 +47,13 @@
(TC use only - dev_queue_xmit
returns this as NET_XMIT_SUCCESS) */
+/* Backlog congestion levels */
+#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
+#define NET_RX_CN_LOW 1 /* storm alert, just in case */
+#define NET_RX_CN_MOD 2 /* Storm on its way! */
+#define NET_RX_CN_HIGH 5 /* The storm is here */
+#define NET_RX_DROP -1 /* packet dropped */
+
#define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0)
#endif
@@ -440,6 +447,8 @@
struct softnet_data
{
int throttle;
+ int cng_level;
+ int avg_blog;
struct sk_buff_head input_pkt_queue;
struct net_device *output_queue;
struct sk_buff *completion_queue;
@@ -526,7 +535,7 @@
extern void net_call_rx_atomic(void (*fn)(void));
#define HAVE_NETIF_RX 1
-extern void netif_rx(struct sk_buff *skb);
+extern int netif_rx(struct sk_buff *skb);
extern int dev_ioctl(unsigned int cmd, void *);
extern int dev_change_flags(struct net_device *, unsigned);
extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device
*dev);
@@ -628,6 +637,7 @@
extern void netdev_unregister_fc(int bit);
extern int netdev_max_backlog;
extern unsigned long netdev_fc_xoff;
+extern atomic_t netdev_dropping;
extern int netdev_set_master(struct net_device *dev, struct net_device
*master);
#ifdef CONFIG_NET_FASTROUTE
extern int netdev_fastroute;
--- /usr/src/linux/net/netsyms.c 2000/09/16 23:00:33 1.1
+++ /usr/src/linux/net/netsyms.c 2000/09/16 23:01:28
@@ -490,6 +490,7 @@
EXPORT_SYMBOL(dev_ioctl);
EXPORT_SYMBOL(dev_queue_xmit);
#ifdef CONFIG_NET_HW_FLOWCONTROL
+EXPORT_SYMBOL(netdev_dropping);
EXPORT_SYMBOL(netdev_register_fc);
EXPORT_SYMBOL(netdev_unregister_fc);
EXPORT_SYMBOL(netdev_fc_xoff);