Hi,

ok, seems we are getting near submission for kernel inclusion. If no new 
comments arise, the only thing missing from my side is documentation. VLAN is 
again included in this patch to show one use case.

Changes since last version:

-remove NETIF_F_STACKED, use dev->iflink instead. Change VLAN to set 
dev->iflink properly
-rename IFF_CARRIER to IFF_LOWER_UP
-reject userspace controlled DORMANT->UP transition if netif_dormant() is set
-add all operstate attributes to sysfs
-vlan_transfer_operstate() sets dormant first, then carrier
-call rfc2863_policy() also when interface is admin down to keep operstate 
recent. This allows to move vlan_transfer_operstate() to vlan setup function. 
Couldn't get rid of unconditional call to linkwatch_fire_event() though
-small fixes

I've successfully tested stacking and userspace interaction, also with drivers 
that do not call netif_carrier_*()

Should we reset link_mode on dev_open() to allow the user to reset the state 
to a usable state with a down/up transition if supplicant crashes for some 
reason or should we depend on maintainers/distributors to update userspace 
for this?

Stefan
diff -X dontdiff -ur linux-2.6.14/include/linux/if.h linux-2.6.14-rfc2863/include/linux/if.h
--- linux-2.6.14/include/linux/if.h	2005-11-02 11:07:32.000000000 +0100
+++ linux-2.6.14-rfc2863/include/linux/if.h	2005-11-30 21:15:24.000000000 +0100
@@ -33,7 +33,7 @@
 #define	IFF_LOOPBACK	0x8		/* is a loopback net		*/
 #define	IFF_POINTOPOINT	0x10		/* interface is has p-p link	*/
 #define	IFF_NOTRAILERS	0x20		/* avoid use of trailers	*/
-#define	IFF_RUNNING	0x40		/* interface running and carrier ok */
+#define	IFF_RUNNING	0x40		/* interface RFC2863 OPER_UP	*/
 #define	IFF_NOARP	0x80		/* no ARP protocol		*/
 #define	IFF_PROMISC	0x100		/* receive all packets		*/
 #define	IFF_ALLMULTI	0x200		/* receive all multicast packets*/
@@ -43,12 +43,16 @@
 
 #define IFF_MULTICAST	0x1000		/* Supports multicast		*/
 
-#define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING)
-
 #define IFF_PORTSEL	0x2000          /* can set media type		*/
 #define IFF_AUTOMEDIA	0x4000		/* auto media select active	*/
 #define IFF_DYNAMIC	0x8000		/* dialup device with changing addresses*/
 
+#define IFF_LOWER_UP	0x10000		/* driver signals L1 up		*/
+#define IFF_DORMANT	0x20000		/* driver signals dormant	*/
+
+#define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|\
+		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
+
 /* Private (from user) interface flags (netdevice->priv_flags). */
 #define IFF_802_1Q_VLAN 0x1             /* 802.1Q VLAN device.          */
 #define IFF_EBRIDGE	0x2		/* Ethernet bridging device.	*/
@@ -80,6 +84,22 @@
 #define IF_PROTO_FR_ETH_PVC 0x200B
 #define IF_PROTO_RAW    0x200C          /* RAW Socket                   */
 
+/* RFC 2863 operational status */
+enum {
+	IF_OPER_UNKNOWN,
+	IF_OPER_NOTPRESENT,
+	IF_OPER_DOWN,
+	IF_OPER_LOWERLAYERDOWN,
+	IF_OPER_TESTING,
+	IF_OPER_DORMANT,
+	IF_OPER_UP,
+};
+
+/* link modes */
+enum {
+	IF_LINK_MODE_DEFAULT,
+	IF_LINK_MODE_DORMANT,	/* limit upward transition to dormant */
+};
 
 /*
  *	Device mapping structure. I'd just gone off and designed a 
diff -X dontdiff -ur linux-2.6.14/include/linux/netdevice.h linux-2.6.14-rfc2863/include/linux/netdevice.h
--- linux-2.6.14/include/linux/netdevice.h	2005-11-02 11:08:10.000000000 +0100
+++ linux-2.6.14-rfc2863/include/linux/netdevice.h	2005-11-30 21:01:43.000000000 +0100
@@ -230,7 +230,8 @@
 	__LINK_STATE_SCHED,
 	__LINK_STATE_NOCARRIER,
 	__LINK_STATE_RX_SCHED,
-	__LINK_STATE_LINKWATCH_PENDING
+	__LINK_STATE_LINKWATCH_PENDING,
+	__LINK_STATE_DORMANT,
 };
 
 
@@ -334,11 +335,14 @@
 	 */
 
 
-	unsigned short		flags;	/* interface flags (a la BSD)	*/
+	unsigned int		flags;	/* interface flags (a la BSD)	*/
 	unsigned short		gflags;
         unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
 	unsigned short		padded;	/* How much padding added by alloc_netdev() */
 
+	unsigned char		operstate; /* RFC2863 operstate */
+	unsigned char		link_mode; /* mapping policy to operstate */
+
 	unsigned		mtu;	/* interface MTU value		*/
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
@@ -712,6 +716,10 @@
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
+ *
+ * The name carrier is inappropriate, these functions should really be
+ * called netif_lowerlayer_*() because they represent the state of any
+ * kind of lower layer not just hardware media.
  */
 
 extern void linkwatch_fire_event(struct net_device *dev);
@@ -727,6 +735,29 @@
 
 extern void netif_carrier_off(struct net_device *dev);
 
+static inline void netif_dormant_on(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state))
+		linkwatch_fire_event(dev);
+}
+
+static inline void netif_dormant_off(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state))
+		linkwatch_fire_event(dev);
+}
+
+static inline int netif_dormant(const struct net_device *dev)
+{
+	return test_bit(__LINK_STATE_DORMANT, &dev->state);
+}
+
+
+static inline int netif_oper_up(const struct net_device *dev) {
+	return (dev->operstate == IF_OPER_UP ||
+		dev->operstate == IF_OPER_UNKNOWN /* backward compat */);
+}
+
 /* Hot-plugging. */
 static inline int netif_device_present(struct net_device *dev)
 {
diff -X dontdiff -ur linux-2.6.14/include/linux/rtnetlink.h linux-2.6.14-rfc2863/include/linux/rtnetlink.h
--- linux-2.6.14/include/linux/rtnetlink.h	2005-11-02 11:08:11.000000000 +0100
+++ linux-2.6.14-rfc2863/include/linux/rtnetlink.h	2005-11-18 20:14:05.000000000 +0100
@@ -733,6 +733,8 @@
 #define IFLA_MAP IFLA_MAP
 	IFLA_WEIGHT,
 #define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
 	__IFLA_MAX
 };
 
diff -X dontdiff -ur linux-2.6.14/net/8021q/vlan.c linux-2.6.14-rfc2863/net/8021q/vlan.c
--- linux-2.6.14/net/8021q/vlan.c	2005-11-02 11:07:35.000000000 +0100
+++ linux-2.6.14-rfc2863/net/8021q/vlan.c	2005-11-30 22:48:49.000000000 +0100
@@ -68,7 +68,7 @@
 
 /* Bits of netdev state that are propagated from real device to virtual */
 #define VLAN_LINK_STATE_MASK \
-	((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER))
+	((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)|(1<<__LINK_STATE_DORMANT))
 
 /* End of global variables definitions. */
 
@@ -343,6 +343,26 @@
 	new_dev->do_ioctl = vlan_dev_ioctl;
 }
 
+static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev)
+{
+	/* Have to respect userspace enforced dormant state
+	 * of real device, also must allow supplicant running
+	 * on VLAN device
+	 */
+	if (dev->operstate == IF_OPER_DORMANT)
+		netif_dormant_on(vlandev);
+	else
+		netif_dormant_off(vlandev);
+
+	if (netif_carrier_ok(dev)) {
+		if (!netif_carrier_ok(vlandev))
+			netif_carrier_on(vlandev);
+	} else {
+		if (netif_carrier_ok(vlandev))
+			netif_carrier_off(vlandev);
+	}
+}
+
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns the device that was created, or NULL if there was
  *  an error of some kind.
@@ -449,7 +469,7 @@
 	new_dev->flags = real_dev->flags;
 	new_dev->flags &= ~IFF_UP;
 
-	new_dev->state = real_dev->state & VLAN_LINK_STATE_MASK;
+	new_dev->state = real_dev->state & ~(1<<__LINK_STATE_START);
 
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
@@ -497,6 +517,10 @@
 	if (register_netdevice(new_dev))
 		goto out_free_newdev;
 
+	new_dev->iflink = real_dev->ifindex;
+	vlan_transfer_operstate(real_dev, new_dev);
+	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
+	
 	/* So, got the sucker initialized, now lets place
 	 * it into our local structure.
 	 */
@@ -572,25 +596,12 @@
 	switch (event) {
 	case NETDEV_CHANGE:
 		/* Propagate real device state to vlan devices */
-		flgs = dev->state & VLAN_LINK_STATE_MASK;
 		for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
 			vlandev = grp->vlan_devices[i];
 			if (!vlandev)
 				continue;
 
-			if (netif_carrier_ok(dev)) {
-				if (!netif_carrier_ok(vlandev))
-					netif_carrier_on(vlandev);
-			} else {
-				if (netif_carrier_ok(vlandev))
-					netif_carrier_off(vlandev);
-			}
-
-			if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
-				vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK) 
-					| flgs;
-				netdev_state_change(vlandev);
-			}
+			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
 
diff -X dontdiff -ur linux-2.6.14/net/core/dev.c linux-2.6.14-rfc2863/net/core/dev.c
--- linux-2.6.14/net/core/dev.c	2005-11-06 17:35:22.000000000 +0100
+++ linux-2.6.14-rfc2863/net/core/dev.c	2005-11-30 21:15:50.000000000 +0100
@@ -2141,12 +2141,20 @@
 
 	flags = (dev->flags & ~(IFF_PROMISC |
 				IFF_ALLMULTI |
-				IFF_RUNNING)) | 
+				IFF_RUNNING |
+				IFF_LOWER_UP |
+				IFF_DORMANT)) | 
 		(dev->gflags & (IFF_PROMISC |
 				IFF_ALLMULTI));
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
-		flags |= IFF_RUNNING;
+	if (netif_running(dev)) {
+		if (netif_oper_up(dev))
+			flags |= IFF_RUNNING;
+		if (netif_carrier_ok(dev))
+			flags |= IFF_LOWER_UP;
+		if (netif_dormant(dev))
+			flags |= IFF_DORMANT;
+	}		
 
 	return flags;
 }
diff -X dontdiff -ur linux-2.6.14/net/core/link_watch.c linux-2.6.14-rfc2863/net/core/link_watch.c
--- linux-2.6.14/net/core/link_watch.c	2005-06-17 21:48:29.000000000 +0200
+++ linux-2.6.14-rfc2863/net/core/link_watch.c	2005-11-30 21:13:53.000000000 +0100
@@ -49,6 +49,34 @@
 /* Avoid kmalloc() for most systems */
 static struct lw_event singleevent;
 
+static inline unsigned char default_operstate(const struct net_device *dev) {
+	if (!netif_carrier_ok(dev))
+		return dev->ifindex!=dev->iflink?IF_OPER_LOWERLAYERDOWN:IF_OPER_DOWN;
+	if (netif_dormant(dev)) return IF_OPER_DORMANT;
+	return IF_OPER_UP;
+}	
+
+
+static void rfc2863_policy(struct net_device *dev) {
+	unsigned char operstate = default_operstate(dev);
+	
+	if (operstate == dev->operstate) return;
+
+	switch(dev->link_mode) {
+	case IF_LINK_MODE_DORMANT:
+		if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT;
+		break;
+	case IF_LINK_MODE_DEFAULT:
+	default:
+		break;
+	}
+
+	write_lock_bh(&dev_base_lock);
+	dev->operstate = operstate;
+	write_unlock_bh(&dev_base_lock);
+}
+
+	
 /* Must be called with the rtnl semaphore held */
 void linkwatch_run_queue(void)
 {
@@ -74,6 +102,7 @@
 		 */
 		clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
 
+		rfc2863_policy(dev);
 		if (dev->flags & IFF_UP) {
 			if (netif_carrier_ok(dev)) {
 				WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
diff -X dontdiff -ur linux-2.6.14/net/core/net-sysfs.c linux-2.6.14-rfc2863/net/core/net-sysfs.c
--- linux-2.6.14/net/core/net-sysfs.c	2005-06-17 21:48:29.000000000 +0200
+++ linux-2.6.14-rfc2863/net/core/net-sysfs.c	2005-11-28 21:33:44.000000000 +0100
@@ -94,6 +94,7 @@
 NETDEVICE_ATTR(ifindex, fmt_dec);
 NETDEVICE_ATTR(features, fmt_long_hex);
 NETDEVICE_ATTR(type, fmt_dec);
+NETDEVICE_ATTR(link_mode, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
@@ -136,9 +137,44 @@
 	return -EINVAL;
 }
 
+static ssize_t show_dormant(struct class_device *dev, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	if (netif_running(netdev)) {
+		return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
+	}
+	return -EINVAL;
+}
+
+static const char *operstates[] = {
+	"unknown",
+	NULL, /* notpresent, currently unused */
+	"down",
+	"lowerlayerdown",
+	NULL, /* testing, currently unused */
+	"dormant",
+	"up"
+};
+	
+static ssize_t show_operstate(struct class_device *dev, char *buf)
+{
+	const struct net_device *netdev = to_net_dev(dev);
+	unsigned char operstate;
+
+	read_lock(&dev_base_lock);
+	operstate = netdev->operstate;
+	if (!netif_running(netdev)) operstate = IF_OPER_DOWN;
+	read_unlock(&dev_base_lock);
+
+	if (operstate >= sizeof(operstates)) return -EINVAL; /* should not happen */
+	return sprintf(buf, "%s\n", operstates[operstate]);
+}
+
 static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
 static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL);
 static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
+static CLASS_DEVICE_ATTR(dormant, S_IRUGO, show_dormant, NULL);
+static CLASS_DEVICE_ATTR(operstate, S_IRUGO, show_operstate, NULL);
 
 /* read-write attributes */
 NETDEVICE_SHOW(mtu, fmt_dec);
@@ -212,9 +248,12 @@
 	&class_device_attr_flags,
 	&class_device_attr_weight,
 	&class_device_attr_type,
+	&class_device_attr_link_mode,
 	&class_device_attr_address,
 	&class_device_attr_broadcast,
 	&class_device_attr_carrier,
+	&class_device_attr_dormant,
+	&class_device_attr_operstate,
 	NULL
 };
 
diff -X dontdiff -ur linux-2.6.14/net/core/rtnetlink.c linux-2.6.14-rfc2863/net/core/rtnetlink.c
--- linux-2.6.14/net/core/rtnetlink.c	2005-11-02 11:08:12.000000000 +0100
+++ linux-2.6.14-rfc2863/net/core/rtnetlink.c	2005-11-30 22:13:02.000000000 +0100
@@ -178,6 +178,32 @@
 }
 
 
+static void set_operstate(struct net_device *dev, unsigned char transition) {
+	unsigned char operstate = dev->operstate;
+	ASSERT_RTNL();
+
+	switch(transition) {
+	case IF_OPER_UP:
+		if ((operstate == IF_OPER_DORMANT ||
+		     operstate == IF_OPER_UNKNOWN) &&
+		    !netif_dormant(dev))
+			operstate = IF_OPER_UP;
+		break;
+	case IF_OPER_DORMANT:
+		if (operstate == IF_OPER_UP ||
+		    operstate == IF_OPER_UNKNOWN)
+			operstate = IF_OPER_DORMANT;
+		break;
+	}
+
+	if (dev->operstate != operstate) {
+		write_lock_bh(&dev_base_lock);
+		dev->operstate = operstate;
+		write_unlock_bh(&dev_base_lock);
+		netdev_state_change(dev);
+	}	
+}
+
 static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				 int type, u32 pid, u32 seq, u32 change, 
 				 unsigned int flags)
@@ -208,6 +234,13 @@
 	}
 
 	if (1) {
+		u8 operstate = dev->operstate;
+		u8 link_mode = dev->link_mode;
+		RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate);
+		RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode);
+	}	
+
+	if (1) {
 		struct rtnl_link_ifmap map = {
 			.mem_start   = dev->mem_start,
 			.mem_end     = dev->mem_end,
@@ -398,6 +431,22 @@
 		dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1]));
 	}
 
+	if (ida[IFLA_OPERSTATE - 1]) {
+		if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
+			goto out;
+
+		set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1])));
+	}	
+
+	if (ida[IFLA_LINKMODE - 1]) {
+		if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8)))
+			goto out;
+
+		write_lock_bh(&dev_base_lock);
+		dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1]));
+		write_unlock_bh(&dev_base_lock);
+	}
+
 	if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) {
 		char ifname[IFNAMSIZ];
 

Reply via email to