On 5/17/2018 9:08 PM, Tom Herbert wrote: > On Tue, May 15, 2018 at 6:26 PM, Amritha Nambiar > <amritha.namb...@intel.com> wrote: >> Refactor XPS code to support Tx queue selection based on >> CPU map or Rx queue map. >> >> Signed-off-by: Amritha Nambiar <amritha.namb...@intel.com> >> --- >> include/linux/cpumask.h | 11 ++ >> include/linux/netdevice.h | 72 +++++++++++++++- >> net/core/dev.c | 208 >> +++++++++++++++++++++++++++++---------------- >> net/core/net-sysfs.c | 4 - >> 4 files changed, 215 insertions(+), 80 deletions(-) >> >> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h >> index bf53d89..57f20a0 100644 >> --- a/include/linux/cpumask.h >> +++ b/include/linux/cpumask.h >> @@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask; >> #define cpu_active(cpu) ((cpu) == 0) >> #endif >> >> -/* verify cpu argument to cpumask_* operators */ >> -static inline unsigned int cpumask_check(unsigned int cpu) >> +static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) >> { >> #ifdef CONFIG_DEBUG_PER_CPU_MAPS >> - WARN_ON_ONCE(cpu >= nr_cpumask_bits); >> + WARN_ON_ONCE(cpu >= bits); >> #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ >> +} >> + >> +/* verify cpu argument to cpumask_* operators */ >> +static inline unsigned int cpumask_check(unsigned int cpu) >> +{ >> + cpu_max_bits_warn(cpu, nr_cpumask_bits); >> return cpu; >> } >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index 03ed492..c2eeb36 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -730,10 +730,21 @@ struct xps_map { >> */ >> struct xps_dev_maps { >> struct rcu_head rcu; >> - struct xps_map __rcu *cpu_map[0]; >> + struct xps_map __rcu *attr_map[0]; >> }; >> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ >> + >> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ >> (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) >> + >> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\ >> + (_rxqs * (_tcs) * sizeof(struct xps_map *))) >> + >> +enum xps_map_type { >> + XPS_MAP_RXQS, >> + XPS_MAP_CPUS, >> + __XPS_MAP_MAX >> +}; >> + >> #endif /* CONFIG_XPS */ >> >> #define TC_MAX_QUEUE 16 >> @@ -1891,7 +1902,7 @@ struct net_device { >> int watchdog_timeo; >> >> #ifdef CONFIG_XPS >> - struct xps_dev_maps __rcu *xps_maps; >> + struct xps_dev_maps __rcu *xps_maps[__XPS_MAP_MAX]; >> #endif >> #ifdef CONFIG_NET_CLS_ACT >> struct mini_Qdisc __rcu *miniq_egress; >> @@ -3229,6 +3240,61 @@ static inline void netif_wake_subqueue(struct >> net_device *dev, u16 queue_index) >> #ifdef CONFIG_XPS >> int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, >> u16 index); >> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, >> + u16 index, enum xps_map_type type); >> + >> +static inline bool attr_test_mask(unsigned long j, const unsigned long >> *mask, >> + unsigned int nr_bits) >> +{ >> + cpu_max_bits_warn(j, nr_bits); >> + return test_bit(j, mask); >> +} >> + >> +static inline bool attr_test_online(unsigned long j, >> + const unsigned long *online_mask, >> + unsigned int nr_bits) >> +{ >> + cpu_max_bits_warn(j, nr_bits); >> + >> + if (online_mask) >> + return test_bit(j, online_mask); >> + >> + if (j >= 0 && j < nr_bits) >> + return true; >> + >> + return false; >> +} >> + >> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp, >> + unsigned int nr_bits) >> +{ >> + /* -1 is a legal arg here. */ >> + if (n != -1) >> + cpu_max_bits_warn(n, nr_bits); >> + >> + if (srcp) >> + return find_next_bit(srcp, nr_bits, n + 1); >> + >> + return n + 1; >> +} >> + >> +static inline int attrmask_next_and(int n, const unsigned long *src1p, >> + const unsigned long *src2p, >> + unsigned int nr_bits) >> +{ >> + /* -1 is a legal arg here. */ >> + if (n != -1) >> + cpu_max_bits_warn(n, nr_bits); >> + >> + if (src1p && src2p) >> + return find_next_and_bit(src1p, src2p, nr_bits, n + 1); >> + else if (src1p) >> + return find_next_bit(src1p, nr_bits, n + 1); >> + else if (src2p) >> + return find_next_bit(src2p, nr_bits, n + 1); >> + >> + return n + 1; >> +} >> #else >> static inline int netif_set_xps_queue(struct net_device *dev, >> const struct cpumask *mask, >> diff --git a/net/core/dev.c b/net/core/dev.c >> index 9f43901..7e5dfdb 100644 >> --- a/net/core/dev.c >> +++ b/net/core/dev.c >> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps >> *dev_maps, >> int pos; >> >> if (dev_maps) >> - map = xmap_dereference(dev_maps->cpu_map[tci]); >> + map = xmap_dereference(dev_maps->attr_map[tci]); >> if (!map) >> return false; >> >> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps >> *dev_maps, >> break; >> } >> >> - RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); >> + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); >> kfree_rcu(map, rcu); >> return false; >> } >> @@ -2125,7 +2125,7 @@ static bool remove_xps_queue_cpu(struct net_device >> *dev, >> int i, j; >> >> for (i = count, j = offset; i--; j++) { >> - if (!remove_xps_queue(dev_maps, cpu, j)) >> + if (!remove_xps_queue(dev_maps, tci, j)) >> break; >> } >> >> @@ -2138,30 +2138,47 @@ static bool remove_xps_queue_cpu(struct net_device >> *dev, >> static void netif_reset_xps_queues(struct net_device *dev, u16 offset, >> u16 count) >> { >> + const unsigned long *possible_mask = NULL; >> + enum xps_map_type type = XPS_MAP_RXQS; >> struct xps_dev_maps *dev_maps; >> - int cpu, i; >> bool active = false; >> + unsigned int nr_ids; >> + int i, j; >> >> mutex_lock(&xps_map_mutex); >> - dev_maps = xmap_dereference(dev->xps_maps); >> >> - if (!dev_maps) >> - goto out_no_maps; >> + while (type < __XPS_MAP_MAX) { >> + dev_maps = xmap_dereference(dev->xps_maps[type]); >> + if (!dev_maps) >> + goto out_no_maps; >> + >> + if (type == XPS_MAP_CPUS) { >> + if (num_possible_cpus() > 1) >> + possible_mask = >> cpumask_bits(cpu_possible_mask); >> + nr_ids = nr_cpu_ids; >> + } else if (type == XPS_MAP_RXQS) { >> + nr_ids = dev->num_rx_queues; >> + } > type is an enum so this should be a switch
Will fix in v3. > >> >> - for_each_possible_cpu(cpu) >> - active |= remove_xps_queue_cpu(dev, dev_maps, cpu, >> - offset, count); >> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids), >> + j < nr_ids;) >> + active |= remove_xps_queue_cpu(dev, dev_maps, j, >> offset, >> + count); >> + if (!active) { >> + RCU_INIT_POINTER(dev->xps_maps[type], NULL); >> + kfree_rcu(dev_maps, rcu); >> + } >> >> - if (!active) { >> - RCU_INIT_POINTER(dev->xps_maps, NULL); >> - kfree_rcu(dev_maps, rcu); >> + if (type == XPS_MAP_CPUS) { >> + for (i = offset + (count - 1); count--; i--) >> + netdev_queue_numa_node_write( >> + netdev_get_tx_queue(dev, i), >> + NUMA_NO_NODE); >> + } >> +out_no_maps: >> + type++; >> } >> >> - for (i = offset + (count - 1); count--; i--) >> - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), >> - NUMA_NO_NODE); >> - >> -out_no_maps: >> mutex_unlock(&xps_map_mutex); >> } >> >> @@ -2170,11 +2187,11 @@ static void netif_reset_xps_queues_gt(struct >> net_device *dev, u16 index) >> netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); >> } >> >> -static struct xps_map *expand_xps_map(struct xps_map *map, >> - int cpu, u16 index) >> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index, >> + u16 index, enum xps_map_type type) >> { >> - struct xps_map *new_map; >> int alloc_len = XPS_MIN_MAP_ALLOC; >> + struct xps_map *new_map = NULL; >> int i, pos; >> >> for (pos = 0; map && pos < map->len; pos++) { >> @@ -2183,7 +2200,7 @@ static struct xps_map *expand_xps_map(struct xps_map >> *map, >> return map; >> } >> >> - /* Need to add queue to this CPU's existing map */ >> + /* Need to add tx-queue to this CPU's/rx-queue's existing map */ >> if (map) { >> if (pos < map->alloc_len) >> return map; >> @@ -2191,9 +2208,14 @@ static struct xps_map *expand_xps_map(struct xps_map >> *map, >> alloc_len = map->alloc_len * 2; >> } >> >> - /* Need to allocate new map to store queue on this CPU's map */ >> - new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, >> - cpu_to_node(cpu)); >> + /* Need to allocate new map to store tx-queue on this >> CPU's/rx-queue's >> + * map >> + */ >> + if (type == XPS_MAP_RXQS) >> + new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL); >> + else if (type == XPS_MAP_CPUS) >> + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, >> + cpu_to_node(attr_index)); > switch here also Will fix in v3. > >> if (!new_map) >> return NULL; >> >> @@ -2205,14 +2227,16 @@ static struct xps_map *expand_xps_map(struct xps_map >> *map, >> return new_map; >> } >> >> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, >> - u16 index) >> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, >> + u16 index, enum xps_map_type type) >> { >> + const unsigned long *online_mask = NULL, *possible_mask = NULL; >> struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; >> - int i, cpu, tci, numa_node_id = -2; >> + int i, j, tci, numa_node_id = -2; >> int maps_sz, num_tc = 1, tc = 0; >> struct xps_map *map, *new_map; >> bool active = false; >> + unsigned int nr_ids; >> >> if (dev->num_tc) { >> num_tc = dev->num_tc; >> @@ -2221,16 +2245,33 @@ int netif_set_xps_queue(struct net_device *dev, >> const struct cpumask *mask, >> return -EINVAL; >> } >> >> - maps_sz = XPS_DEV_MAPS_SIZE(num_tc); >> + switch (type) { >> + case XPS_MAP_RXQS: >> + maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues); >> + dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_RXQS]); >> + nr_ids = dev->num_rx_queues; >> + break; >> + case XPS_MAP_CPUS: >> + maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc); >> + if (num_possible_cpus() > 1) { >> + online_mask = cpumask_bits(cpu_online_mask); >> + possible_mask = cpumask_bits(cpu_possible_mask); >> + } >> + dev_maps = xmap_dereference(dev->xps_maps[XPS_MAP_CPUS]); >> + nr_ids = nr_cpu_ids; >> + break; >> + default: >> + return -EINVAL; >> + } >> + >> if (maps_sz < L1_CACHE_BYTES) >> maps_sz = L1_CACHE_BYTES; >> >> mutex_lock(&xps_map_mutex); >> >> - dev_maps = xmap_dereference(dev->xps_maps); >> - >> /* allocate memory for queue storage */ >> - for_each_cpu_and(cpu, cpu_online_mask, mask) { >> + for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids), >> + j < nr_ids;) { >> if (!new_dev_maps) >> new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); >> if (!new_dev_maps) { >> @@ -2238,73 +2279,81 @@ int netif_set_xps_queue(struct net_device *dev, >> const struct cpumask *mask, >> return -ENOMEM; >> } >> >> - tci = cpu * num_tc + tc; >> - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : >> + tci = j * num_tc + tc; >> + map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) : >> NULL; >> >> - map = expand_xps_map(map, cpu, index); >> + map = expand_xps_map(map, j, index, type); >> if (!map) >> goto error; >> >> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); >> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); >> } >> >> if (!new_dev_maps) >> goto out_no_new_maps; >> >> - for_each_possible_cpu(cpu) { >> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids), >> + j < nr_ids;) { >> /* copy maps belonging to foreign traffic classes */ >> - for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { >> + for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) { >> /* fill in the new device map from the old device >> map */ >> - map = xmap_dereference(dev_maps->cpu_map[tci]); >> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); >> + map = xmap_dereference(dev_maps->attr_map[tci]); >> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); >> } >> >> /* We need to explicitly update tci as prevous loop >> * could break out early if dev_maps is NULL. >> */ >> - tci = cpu * num_tc + tc; >> + tci = j * num_tc + tc; >> >> - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { >> - /* add queue to CPU maps */ >> + if (attr_test_mask(j, mask, nr_ids) && >> + attr_test_online(j, online_mask, nr_ids)) { >> + /* add tx-queue to CPU/rx-queue maps */ >> int pos = 0; >> >> - map = xmap_dereference(new_dev_maps->cpu_map[tci]); >> + map = xmap_dereference(new_dev_maps->attr_map[tci]); >> while ((pos < map->len) && (map->queues[pos] != >> index)) >> pos++; >> >> if (pos == map->len) >> map->queues[map->len++] = index; >> #ifdef CONFIG_NUMA >> - if (numa_node_id == -2) >> - numa_node_id = cpu_to_node(cpu); >> - else if (numa_node_id != cpu_to_node(cpu)) >> - numa_node_id = -1; >> + if (type == XPS_MAP_CPUS) { >> + if (numa_node_id == -2) >> + numa_node_id = cpu_to_node(j); >> + else if (numa_node_id != cpu_to_node(j)) >> + numa_node_id = -1; >> + } >> #endif >> } else if (dev_maps) { >> /* fill in the new device map from the old device >> map */ >> - map = xmap_dereference(dev_maps->cpu_map[tci]); >> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); >> + map = xmap_dereference(dev_maps->attr_map[tci]); >> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); >> } >> >> /* copy maps belonging to foreign traffic classes */ >> for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { >> /* fill in the new device map from the old device >> map */ >> - map = xmap_dereference(dev_maps->cpu_map[tci]); >> - RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); >> + map = xmap_dereference(dev_maps->attr_map[tci]); >> + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); >> } >> } >> >> - rcu_assign_pointer(dev->xps_maps, new_dev_maps); >> + if (type == XPS_MAP_RXQS) >> + rcu_assign_pointer(dev->xps_maps[XPS_MAP_RXQS], >> new_dev_maps); >> + else if (type == XPS_MAP_CPUS) >> + rcu_assign_pointer(dev->xps_maps[XPS_MAP_CPUS], >> new_dev_maps); >> >> /* Cleanup old maps */ >> if (!dev_maps) >> goto out_no_old_maps; >> >> - for_each_possible_cpu(cpu) { >> - for (i = num_tc, tci = cpu * num_tc; i--; tci++) { >> - new_map = >> xmap_dereference(new_dev_maps->cpu_map[tci]); >> - map = xmap_dereference(dev_maps->cpu_map[tci]); >> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids), >> + j < nr_ids;) { >> + for (i = num_tc, tci = j * num_tc; i--; tci++) { >> + new_map = >> xmap_dereference(new_dev_maps->attr_map[tci]); >> + map = xmap_dereference(dev_maps->attr_map[tci]); >> if (map && map != new_map) >> kfree_rcu(map, rcu); >> } >> @@ -2317,19 +2366,23 @@ int netif_set_xps_queue(struct net_device *dev, >> const struct cpumask *mask, >> active = true; >> >> out_no_new_maps: >> - /* update Tx queue numa node */ >> - netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), >> - (numa_node_id >= 0) ? numa_node_id : >> - NUMA_NO_NODE); >> + if (type == XPS_MAP_CPUS) { >> + /* update Tx queue numa node */ >> + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), >> + (numa_node_id >= 0) ? >> + numa_node_id : NUMA_NO_NODE); >> + } >> >> if (!dev_maps) >> goto out_no_maps; >> >> - /* removes queue from unused CPUs */ >> - for_each_possible_cpu(cpu) { >> - for (i = tc, tci = cpu * num_tc; i--; tci++) >> + /* removes tx-queue from unused CPUs/rx-queues */ >> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids), >> + j < nr_ids;) { >> + for (i = tc, tci = j * num_tc; i--; tci++) >> active |= remove_xps_queue(dev_maps, tci, index); >> - if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) >> + if (!attr_test_mask(j, mask, nr_ids) || >> + !attr_test_online(j, online_mask, nr_ids)) >> active |= remove_xps_queue(dev_maps, tci, index); >> for (i = num_tc - tc, tci++; --i; tci++) >> active |= remove_xps_queue(dev_maps, tci, index); >> @@ -2337,7 +2390,10 @@ int netif_set_xps_queue(struct net_device *dev, const >> struct cpumask *mask, >> >> /* free map if not active */ >> if (!active) { >> - RCU_INIT_POINTER(dev->xps_maps, NULL); >> + if (type == XPS_MAP_RXQS) >> + RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_RXQS], NULL); >> + else if (type == XPS_MAP_CPUS) >> + RCU_INIT_POINTER(dev->xps_maps[XPS_MAP_CPUS], NULL); >> kfree_rcu(dev_maps, rcu); >> } >> >> @@ -2347,11 +2403,12 @@ int netif_set_xps_queue(struct net_device *dev, >> const struct cpumask *mask, >> return 0; >> error: >> /* remove any maps that we added */ >> - for_each_possible_cpu(cpu) { >> - for (i = num_tc, tci = cpu * num_tc; i--; tci++) { >> - new_map = >> xmap_dereference(new_dev_maps->cpu_map[tci]); >> + for (j = -1; j = attrmask_next(j, possible_mask, nr_ids), >> + j < nr_ids;) { >> + for (i = num_tc, tci = j * num_tc; i--; tci++) { >> + new_map = >> xmap_dereference(new_dev_maps->attr_map[tci]); >> map = dev_maps ? >> - xmap_dereference(dev_maps->cpu_map[tci]) : >> + xmap_dereference(dev_maps->attr_map[tci]) : >> NULL; >> if (new_map && new_map != map) >> kfree(new_map); >> @@ -2363,6 +2420,13 @@ int netif_set_xps_queue(struct net_device *dev, const >> struct cpumask *mask, >> kfree(new_dev_maps); >> return -ENOMEM; >> } >> + >> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, >> + u16 index) >> +{ >> + return __netif_set_xps_queue(dev, cpumask_bits(mask), index, >> + XPS_MAP_CPUS); >> +} >> EXPORT_SYMBOL(netif_set_xps_queue); >> >> #endif >> @@ -3402,7 +3466,7 @@ static inline int get_xps_queue(struct net_device >> *dev, struct sk_buff *skb) >> int queue_index = -1; >> >> rcu_read_lock(); >> - dev_maps = rcu_dereference(dev->xps_maps); >> + dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]); >> if (dev_maps) { >> unsigned int tci = skb->sender_cpu - 1; >> >> @@ -3411,7 +3475,7 @@ static inline int get_xps_queue(struct net_device >> *dev, struct sk_buff *skb) >> tci += netdev_get_prio_tc_map(dev, skb->priority); >> } >> >> - map = rcu_dereference(dev_maps->cpu_map[tci]); >> + map = rcu_dereference(dev_maps->attr_map[tci]); >> if (map) { >> if (map->len == 1) >> queue_index = map->queues[0]; >> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c >> index c476f07..d7abd33 100644 >> --- a/net/core/net-sysfs.c >> +++ b/net/core/net-sysfs.c >> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue >> *queue, >> } >> >> rcu_read_lock(); >> - dev_maps = rcu_dereference(dev->xps_maps); >> + dev_maps = rcu_dereference(dev->xps_maps[XPS_MAP_CPUS]); >> if (dev_maps) { >> for_each_possible_cpu(cpu) { >> int i, tci = cpu * num_tc + tc; >> struct xps_map *map; >> >> - map = rcu_dereference(dev_maps->cpu_map[tci]); >> + map = rcu_dereference(dev_maps->attr_map[tci]); >> if (!map) >> continue; >> >>