[PATCH v9,net-next,08/12] crypto: octeontx2: add LF framework
CPT RVU Local Functions(LFs) needs to be attached to the PF/VF to submit the instructions to CPT. This patch adds the interface to initialize and attach the LFs. It also adds interface to register the LF's interrupts. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa --- drivers/crypto/marvell/octeontx2/Makefile | 2 +- .../marvell/octeontx2/otx2_cpt_common.h | 4 + .../marvell/octeontx2/otx2_cpt_mbox_common.c | 56 +++ drivers/crypto/marvell/octeontx2/otx2_cptlf.c | 429 ++ drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 283 drivers/crypto/marvell/octeontx2/otx2_cptpf.h | 2 + .../marvell/octeontx2/otx2_cptpf_mbox.c | 8 + 7 files changed, 783 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.c create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.h diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index 3c4155446296..e47a55961bb8 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -2,6 +2,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o + otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index ae16dc102459..d5576f5d3b90 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -76,4 +76,8 @@ int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 *val); int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, u64 reg, u64 val); +struct otx2_cptlfs_info; +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs); + #endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c index ef1291c4881b..0933031ac827 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Marvell. */ #include "otx2_cpt_common.h" +#include "otx2_cptlf.h" int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) { @@ -112,3 +113,58 @@ int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, return otx2_cpt_send_mbox_msg(mbox, pdev); } + +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_attach *req; + int ret; + + req = (struct rsrc_attach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_ATTACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + req->cptlfs = lfs->lfs_num; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (!lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} + +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_detach *req; + int ret; + + req = (struct rsrc_detach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_DETACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c new file mode 100644 index ..e27ea8909368 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptlf.h" +#include "rvu_reg.h" + +#define CPT_TIMER_HOLD 0x03F +#define CPT_COUNT_HOLD 32 + +static void cptlf_do_set
[PATCH v9,net-next,11/12] crypto: octeontx2: add support to process the crypto request
Attach LFs to CPT VF to process the crypto requests and register LF interrupts. Signed-off-by: Suheil Chandran Signed-off-by: Lukasz Bartosik Signed-off-by: Srujana Challa --- drivers/crypto/marvell/octeontx2/Makefile | 2 +- .../marvell/octeontx2/otx2_cpt_common.h | 3 + .../marvell/octeontx2/otx2_cpt_reqmgr.h | 145 + drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 7 + .../marvell/octeontx2/otx2_cptvf_main.c | 199 +++ .../marvell/octeontx2/otx2_cptvf_mbox.c | 26 + .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 534 ++ 7 files changed, 915 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile index ef6fb2ab3571..41c0a5832b3f 100644 --- a/drivers/crypto/marvell/octeontx2/Makefile +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ - otx2_cpt_mbox_common.o + otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h index ca220178e518..e41de466a3f7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -17,6 +17,9 @@ #define OTX2_CPT_MAX_VFS_NUM 128 #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ (((blk) << 20) | ((slot) << 12) | (offs)) +#define OTX2_CPT_RVU_PFFUNC(pf, func) \ + pf) & RVU_PFVF_PF_MASK) << RVU_PFVF_PF_SHIFT) | \ + (((func) & RVU_PFVF_FUNC_MASK) << RVU_PFVF_FUNC_SHIFT)) #define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF #define OTX2_CPT_NAME_LENGTH 64 diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index 9184f91c68c1..597a998c6df6 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -10,6 +10,22 @@ /* Completion code size and initial value */ #define OTX2_CPT_COMPLETION_CODE_SIZE 8 #define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX2_CPT_MAX_SG_IN_CNT 50 +#define OTX2_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX2_CPT_DMA_MODE_DIRECT 0 +#define OTX2_CPT_DMA_MODE_SG 1 + +/* Context source CPTR or DPTR */ +#define OTX2_CPT_FROM_CPTR 0 +#define OTX2_CPT_FROM_DPTR 1 + +#define OTX2_CPT_MAX_REQ_SIZE 65535 union otx2_cpt_opcode { u16 flags; @@ -19,6 +35,13 @@ union otx2_cpt_opcode { } s; }; +struct otx2_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx2_cpt_opcode opcode; +}; + /* * CPT_INST_S software command definitions * Words EI (0-3) @@ -48,4 +71,126 @@ struct otx2_cpt_iq_command { union otx2_cpt_iq_cmd_word3 cptr; }; +struct otx2_cpt_pending_entry { + void *completion_addr; /* Completion address */ + void *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy;/* Entry status (free/busy) */ +}; + +struct otx2_cpt_pending_queue { + struct otx2_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock;/* Queue lock */ +}; + +struct otx2_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx2_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved_6_31:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved_6_31:26; +#endif + } s; +}; + +struct otx2_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_req
Re: [Linux-kernel-mentees] [PATCH net v2] Bluetooth: Fix slab-out-of-bounds read in hci_le_direct_adv_report_evt()
Hi Peilin, > `num_reports` is not being properly checked. A malformed event packet with > a large `num_reports` number makes hci_le_direct_adv_report_evt() read out > of bounds. Fix it. > > Cc: sta...@vger.kernel.org > Fixes: 2f010b55884e ("Bluetooth: Add support for handling LE Direct > Advertising Report events") > Reported-and-tested-by: syzbot+24ebd650e20bd263c...@syzkaller.appspotmail.com > Link: https://syzkaller.appspot.com/bug?extid=24ebd650e20bd263ca01 > Signed-off-by: Peilin Ye > --- > Change in v2: >- add "Cc: stable@" tag. > > net/bluetooth/hci_event.c | 12 +--- > 1 file changed, 5 insertions(+), 7 deletions(-) > > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > index 4b7fc430793c..aec43ae488d1 100644 > --- a/net/bluetooth/hci_event.c > +++ b/net/bluetooth/hci_event.c > @@ -5863,21 +5863,19 @@ static void hci_le_direct_adv_report_evt(struct > hci_dev *hdev, >struct sk_buff *skb) > { > u8 num_reports = skb->data[0]; > - void *ptr = &skb->data[1]; > + struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1]; > > - hci_dev_lock(hdev); > + if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1) > + return; > > - while (num_reports--) { > - struct hci_ev_le_direct_adv_info *ev = ptr; > + hci_dev_lock(hdev); > > + for (; num_reports; num_reports--, ev++) > process_adv_report(hdev, ev->evt_type, &ev->bdaddr, > ev->bdaddr_type, &ev->direct_addr, > ev->direct_addr_type, ev->rssi, NULL, 0, > false); > > - ptr += sizeof(*ev); > - } > - > hci_dev_unlock(hdev); > } patch has been applied to bluetooth-next tree. Regards Marcel
Re: [PATCH v7 1/5] Bluetooth: Interleave with allowlist scan
Hi Howard, > This patch implements the interleaving between allowlist scan and > no-filter scan. It'll be used to save power when at least one monitor is > registered and at least one pending connection or one device to be > scanned for. > > The durations of the allowlist scan and the no-filter scan are > controlled by MGMT command: Set Default System Configuration. The > default values are set randomly for now. > > Signed-off-by: Howard Chung > Reviewed-by: Alain Michaud > Reviewed-by: Manish Mandlik > --- > > Changes in v7: > - Fix bt_dev_warn argument type warning > > Changes in v6: > - Set parameter EnableAdvMonInterleaveScan to 1 byte long > > Changes in v5: > - Rename 'adv_monitor' from many functions/variables > - Move __hci_update_interleaved_scan into hci_req_add_le_passive_scan > - Update the logic of update_adv_monitor_scan_state > > Changes in v4: > - Rebase to bluetooth-next/master (previous 2 patches are applied) > - Fix over 80 chars limit in mgmt_config.c > - Set EnableAdvMonInterleaveScan default to Disable > > Changes in v3: > - Remove 'Bluez' prefix > > Changes in v2: > - remove 'case 0x001c' in mgmt_config.c > > include/net/bluetooth/hci_core.h | 10 +++ > net/bluetooth/hci_core.c | 4 + > net/bluetooth/hci_request.c | 136 +-- > net/bluetooth/mgmt_config.c | 10 +++ > 4 files changed, 153 insertions(+), 7 deletions(-) > > diff --git a/include/net/bluetooth/hci_core.h > b/include/net/bluetooth/hci_core.h > index 9873e1c8cd163..cfede18709d8f 100644 > --- a/include/net/bluetooth/hci_core.h > +++ b/include/net/bluetooth/hci_core.h > @@ -361,6 +361,8 @@ struct hci_dev { > __u8ssp_debug_mode; > __u8hw_error_code; > __u32 clock; > + __u16 advmon_allowlist_duration; > + __u16 advmon_no_filter_duration; > > __u16 devid_source; > __u16 devid_vendor; > @@ -542,6 +544,14 @@ struct hci_dev { > struct delayed_work rpa_expired; > bdaddr_trpa; > > + enum { > + INTERLEAVE_SCAN_NONE, > + INTERLEAVE_SCAN_NO_FILTER, > + INTERLEAVE_SCAN_ALLOWLIST > + } interleave_scan_state; > + > + struct delayed_work interleave_scan; > + > #if IS_ENABLED(CONFIG_BT_LEDS) > struct led_trigger *power_led; > #endif > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > index 502552d6e9aff..65b7b74baba4c 100644 > --- a/net/bluetooth/hci_core.c > +++ b/net/bluetooth/hci_core.c > @@ -3592,6 +3592,10 @@ struct hci_dev *hci_alloc_dev(void) > hdev->cur_adv_instance = 0x00; > hdev->adv_instance_timeout = 0; > > + /* The default values will be chosen in the future */ > + hdev->advmon_allowlist_duration = 300; > + hdev->advmon_no_filter_duration = 500; > + > hdev->sniff_max_interval = 800; > hdev->sniff_min_interval = 80; > > diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c > index 6f12bab4d2fa6..70ea126f56282 100644 > --- a/net/bluetooth/hci_request.c > +++ b/net/bluetooth/hci_request.c > @@ -378,6 +378,58 @@ void __hci_req_write_fast_connectable(struct hci_request > *req, bool enable) > hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); > } > > +static void start_interleave_scan(struct hci_dev *hdev) > +{ > + hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; > + queue_delayed_work(hdev->req_workqueue, > +&hdev->interleave_scan, 0); > +} > + > +static bool is_interleave_scanning(struct hci_dev *hdev) > +{ > + return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; > +} > + > +static void cancel_interleave_scan(struct hci_dev *hdev) > +{ > + bt_dev_dbg(hdev, "%s cancelling interleave scan", hdev->name); > + > + cancel_delayed_work_sync(&hdev->interleave_scan); > + > + hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE; > +} > + > +/* Return true if interleave_scan wasn't started until exiting this function, > + * otherwise, return false > + */ > +static bool __hci_update_interleaved_scan(struct hci_dev *hdev) > +{ > + if (hci_is_adv_monitoring(hdev) && > + !(list_empty(&hdev->pend_le_conns) && > + list_empty(&hdev->pend_le_reports))) { > + if (!is_interleave_scanning(hdev)) { This extra indentation is rather useless here. Just do another &&. > + /* If there is at least one ADV monitors and one pending > + * LE connection or one device to be scanned for, we > + * should alternate between allowlist scan and one > + * without any filters to save power. > + */ > + start_interleave_scan(hdev); > + bt_dev_dbg(hdev, "%s starting interleave scan", > +hdev->name); > + return true; > + }
Re: [PATCH v7 2/5] Bluetooth: Handle system suspend resume case
Hi Howard, > This patch adds code to handle the system suspension during interleave > scan. The interleave scan will be canceled when the system is going to > sleep, and will be restarted after waking up. > > Commit-changes 5: > - Remove the change in hci_req_config_le_suspend_scan this does not belong here. So please avoid this in the future. > Signed-off-by: Howard Chung > Reviewed-by: Alain Michaud > Reviewed-by: Manish Mandlik > Reviewed-by: Abhishek Pandit-Subedi > Reviewed-by: Miao-chen Chou > --- > > (no changes since v1) > > net/bluetooth/hci_request.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) Patch has been applied to bluetooth-next tree. Regards Marcel
Re: [PATCH v7 4/5] mgmt: Add supports of variable length parameter in mgmt_config
Hi Howard, > This adds support of variable length parameter in mgmt_config. I don’t see how this commit message describes the change correctly. > > Signed-off-by: Howard Chung > --- > > (no changes since v1) > > net/bluetooth/mgmt_config.c | 140 +--- > 1 file changed, 84 insertions(+), 56 deletions(-) > > diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c > index 2d3ad288c78ac..b735e59c7fd51 100644 > --- a/net/bluetooth/mgmt_config.c > +++ b/net/bluetooth/mgmt_config.c > @@ -11,72 +11,100 @@ > #include "mgmt_util.h" > #include "mgmt_config.h" > > -#define HDEV_PARAM_U16(_param_code_, _param_name_) \ > -{ \ > - { cpu_to_le16(_param_code_), sizeof(__u16) }, \ > - { cpu_to_le16(hdev->_param_name_) } \ > -} > +#define HDEV_PARAM_U16(_param_name_) \ > + struct {\ > + struct mgmt_tlv entry; \ > + __le16 value; \ > + } __packed _param_name_ > > -#define HDEV_PARAM_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \ > -{ \ > - { cpu_to_le16(_param_code_), sizeof(__u16) }, \ > - { cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) } \ > -} > +#define TLV_SET_U16(_param_code_, _param_name_) \ > + { \ > + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ > + cpu_to_le16(hdev->_param_name_) \ > + } > + > +#define TLV_SET_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \ > + { \ > + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ > + cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) \ > + } > > int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, > u16 data_len) > { > - struct { > - struct mgmt_tlv entry; > - union { > - /* This is a simplification for now since all values > - * are 16 bits. In the future, this code may need > - * refactoring to account for variable length values > - * and properly calculate the required buffer size. > - */ > - __le16 value; > - }; > - } __packed params[] = { > + int ret; > + struct mgmt_rp_read_def_system_config { > /* Please see mgmt-api.txt for documentation of these values */ > - HDEV_PARAM_U16(0x, def_page_scan_type), > - HDEV_PARAM_U16(0x0001, def_page_scan_int), > - HDEV_PARAM_U16(0x0002, def_page_scan_window), > - HDEV_PARAM_U16(0x0003, def_inq_scan_type), > - HDEV_PARAM_U16(0x0004, def_inq_scan_int), > - HDEV_PARAM_U16(0x0005, def_inq_scan_window), > - HDEV_PARAM_U16(0x0006, def_br_lsto), > - HDEV_PARAM_U16(0x0007, def_page_timeout), > - HDEV_PARAM_U16(0x0008, sniff_min_interval), > - HDEV_PARAM_U16(0x0009, sniff_max_interval), > - HDEV_PARAM_U16(0x000a, le_adv_min_interval), > - HDEV_PARAM_U16(0x000b, le_adv_max_interval), > - HDEV_PARAM_U16(0x000c, def_multi_adv_rotation_duration), > - HDEV_PARAM_U16(0x000d, le_scan_interval), > - HDEV_PARAM_U16(0x000e, le_scan_window), > - HDEV_PARAM_U16(0x000f, le_scan_int_suspend), > - HDEV_PARAM_U16(0x0010, le_scan_window_suspend), > - HDEV_PARAM_U16(0x0011, le_scan_int_discovery), > - HDEV_PARAM_U16(0x0012, le_scan_window_discovery), > - HDEV_PARAM_U16(0x0013, le_scan_int_adv_monitor), > - HDEV_PARAM_U16(0x0014, le_scan_window_adv_monitor), > - HDEV_PARAM_U16(0x0015, le_scan_int_connect), > - HDEV_PARAM_U16(0x0016, le_scan_window_connect), > - HDEV_PARAM_U16(0x0017, le_conn_min_interval), > - HDEV_PARAM_U16(0x0018, le_conn_max_interval), > - HDEV_PARAM_U16(0x0019, le_conn_latency), > - HDEV_PARAM_U16(0x001a, le_supv_timeout), > - HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b, > - def_le_autoconnect_timeout), > - HDEV_PARAM_U16(0x001d, advmon_allowlist_duration), > - HDEV_PARAM_U16(0x001e, advmon_no_filter_duration), > + HDEV_PARAM_U16(def_page_scan_type); > + HDEV_PARAM_U16(def_page_scan_int); > + HDEV_PARAM_U16(def_page_scan_window); > + HDEV_PARAM_U16(def_inq_scan_type); > + HDEV_PARAM_U16(def_inq_scan_int); > + HDEV_PARAM_U16(def_inq_scan_window); > + HDEV_PARAM_U16(def_br_lsto); > + HDEV_PARAM_U16(def_page_timeout); > + HDEV_PARAM_U16(sniff_min_interval); > + HDEV_PARAM_U16(sniff_max_interval); > + HDEV_PARAM_U16(le_adv_min_interval); > + HDEV_PARAM_U16(le_adv_max_interval); > + HDEV_PARAM_U16(def_multi_adv_rotation_duration); > + HDEV_PARAM_U16(le_scan_interval); >
[PATCH ethtool 0/2] netlink: data lifetime error fixes
Fixes of two data lifetime bugs found by testing with valgrind: one use after free, one memory leak. Michal Kubecek (2): netlink: fix use after free in netlink_run_handler() netlink: fix leaked instances of struct nl_socket netlink/netlink.c | 21 +++-- netlink/nlsock.c | 3 +++ 2 files changed, 18 insertions(+), 6 deletions(-) -- 2.29.2
[PATCH ethtool 1/2] netlink: fix use after free in netlink_run_handler()
Valgrind detected use after free in netlink_run_handler(): some members of struct nl_context are accessed after the netlink context is freed by netlink_done(). Use local variables to store the two flags and check them instead. Fixes: 6c19c0d559c8 ("netlink: use genetlink ops information to decide about fallback") Signed-off-by: Michal Kubecek --- netlink/netlink.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/netlink/netlink.c b/netlink/netlink.c index f655f6ea25b7..bdd3048e 100644 --- a/netlink/netlink.c +++ b/netlink/netlink.c @@ -457,6 +457,7 @@ void netlink_run_handler(struct cmd_context *ctx, nl_func_t nlfunc, bool no_fallback) { bool wildcard = ctx->devname && !strcmp(ctx->devname, WILDCARD_DEVNAME); + bool wildcard_unsupported, ioctl_fallback; struct nl_context *nlctx; const char *reason; int ret; @@ -478,14 +479,17 @@ void netlink_run_handler(struct cmd_context *ctx, nl_func_t nlfunc, nlctx = ctx->nlctx; ret = nlfunc(ctx); + wildcard_unsupported = nlctx->wildcard_unsupported; + ioctl_fallback = nlctx->ioctl_fallback; netlink_done(ctx); - if (no_fallback || ret != -EOPNOTSUPP || !nlctx->ioctl_fallback) { - if (nlctx->wildcard_unsupported) + + if (no_fallback || ret != -EOPNOTSUPP || !ioctl_fallback) { + if (wildcard_unsupported) fprintf(stderr, "%s\n", "subcommand does not support wildcard dump"); exit(ret >= 0 ? ret : 1); } - if (nlctx->wildcard_unsupported) + if (wildcard_unsupported) reason = "subcommand does not support wildcard dump"; else reason = "kernel netlink support for subcommand missing"; -- 2.29.2
[PATCH ethtool 2/2] netlink: fix leaked instances of struct nl_socket
Valgrind detected memory leaks caused by missing cleanup of netlink context's ethnl_socket, ethnl2_socket and rtnl_socket. Also, contrary to its description, nlsock_done() does not free struct nl_socket itself. Fix nlsock_done() to free the structure and use it to dispose of sockets pointed to by struct nl_context members. Fixes: 50efb3cdd2bb ("netlink: netlink socket wrapper and helpers") Fixes: 87307c30724d ("netlink: initialize ethtool netlink socket") Fixes: 7f3585b22a4b ("netlink: add handler for permaddr (-P)") Signed-off-by: Michal Kubecek --- netlink/netlink.c | 11 --- netlink/nlsock.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/netlink/netlink.c b/netlink/netlink.c index bdd3048e..ffe06339f099 100644 --- a/netlink/netlink.c +++ b/netlink/netlink.c @@ -435,11 +435,16 @@ out_free: static void netlink_done(struct cmd_context *ctx) { - if (!ctx->nlctx) + struct nl_context *nlctx = ctx->nlctx; + + if (!nlctx) return; - free(ctx->nlctx->ops_info); - free(ctx->nlctx); + nlsock_done(nlctx->ethnl_socket); + nlsock_done(nlctx->ethnl2_socket); + nlsock_done(nlctx->rtnl_socket); + free(nlctx->ops_info); + free(nlctx); ctx->nlctx = NULL; cleanup_all_strings(); } diff --git a/netlink/nlsock.c b/netlink/nlsock.c index ef31d8c33b29..0ec2738d81d2 100644 --- a/netlink/nlsock.c +++ b/netlink/nlsock.c @@ -395,8 +395,11 @@ out_msgbuff: */ void nlsock_done(struct nl_socket *nlsk) { + if (!nlsk) + return; if (nlsk->sk) mnl_socket_close(nlsk->sk); msgbuff_done(&nlsk->msgbuff); memset(nlsk, '\0', sizeof(*nlsk)); + free(nlsk); } -- 2.29.2
Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors
On Mon, Nov 09, 2020 at 12:05:19PM +0100, Tobias Waldekranz wrote: > On Mon, Nov 09, 2020 at 12:03, Vladimir Oltean wrote: > > On Mon, Nov 09, 2020 at 09:09:37AM +0100, Tobias Waldekranz wrote: > >> one. But now you have also increased the background load of an already > >> choked resource, the MDIO bus. > > > > In practice, DSA switches are already very demanding of their management > > interface throughput, for PTP and things like that. I do expect that if > > you spent any significant amount of time with DSA, you already know the > > ins and outs of your MDIO/SPI/I2C controller and it would already be > > optimized for efficiency. But ok, we can add this to the list of cons. > > You are arguing for my position though, no? Yes it is demanding; that is > why we must allocate it carefully. Yes, if the change brings additional load to the MDIO/SPI/I2C link and doesn't bring any benefit, then it makes sense to skip it. > > So there you have it, it's not that bad. More work needs to be done, but > > IMO it's still workable. > > If you bypass learning on all frames sent from the CPU (as today), yes I > agree that you should be able to solve it with static entries. But I > think that you will have lots of weird problems with initial packet loss > as the FDB updates are not synchronous with the packet flow. I.e. the > bridge will tell DSA to update the entry, but the update in HW will > occur some time later when the workqueue actually performs the > operation. I don't know how bad this is in practice. It's surely better than waiting 5 minutes though. > > But now maybe it makes more sense to treat the switches that perform > > hardware SA learning on the CPU port separately, after I've digested > > this a bit. > > Yes, please. Because it will be impossible to add tx forward offloading > otherwise. Ok, so this change, when applied to mv88e6xxx, would preclude you from using FORWARD frames for your other application of that feature, unless you explicitly turn off SA learning for FORWARD frames coming the CPU port, case in which you would still be ok. I need to sit on this for a while. How many DSA drivers do we have that don't do SA learning in hardware for CPU-injected packets? ocelot/felix and mv88e6xxx? Who else? Because if there aren't that many (or any at all except for these two), then I could try to spend some time and see how Felix behaves when I send FORWARD frames to it. Then we could go on full blast with the other alternative, to force-enable address learning from the CPU port, and declare this one as too complicated and not worth the effort.
Re: [PATCH] Bluetooth: Resume advertising after LE connection
Hi Daniel, > When an LE connection request is made, advertising is disabled and never > resumed. When a client has an active advertisement, this is disruptive. > This change adds resume logic for client-configured (non-directed) > advertisements after the connection attempt. > > The patch was tested by registering an advertisement, initiating an LE > connection from a remote peer, and verifying that the advertisement is > re-activated after the connection is established. This is performed on > Hatch and Kukui Chromebooks. > > Reviewed-by: Abhishek Pandit-Subedi > Signed-off-by: Daniel Winkler in the future, please sure that the originator Signed-off-by comes first and the Reviewed-by lines after it > --- > > net/bluetooth/hci_conn.c| 12 ++-- > net/bluetooth/hci_request.c | 21 - > net/bluetooth/hci_request.h | 2 ++ > 3 files changed, 28 insertions(+), 7 deletions(-) Patch has been applied to bluetooth-next tree. Regards Marcel
Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors
On Mon, Nov 09, 2020 at 02:31:11PM +0200, Vladimir Oltean wrote: > I need to sit on this for a while. How many DSA drivers do we have that > don't do SA learning in hardware for CPU-injected packets? ocelot/felix > and mv88e6xxx? Who else? Because if there aren't that many (or any at > all except for these two), then I could try to spend some time and see > how Felix behaves when I send FORWARD frames to it. Then we could go on > full blast with the other alternative, to force-enable address learning > from the CPU port, and declare this one as too complicated and not worth > the effort. In fact I'm not sure that I should be expecting an answer to this question. We can evaluate the other alternative in parallel. Would you be so kind to send some sort of RFC for your TX-side offload_fwd_mark so that I could test with the hardware I have, and get a better understanding of the limitations there?
[PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift
From: Colin Ian King The left shift of u16 variable high is promoted to the type int and then sign extended to a 64 bit u64 value. If the top bit of high is set then the upper 32 bits of the result end up being set by the sign extension. Fix this by explicitly casting the value in high to a u64 before left shifting by 16 places. Also, remove the initialisation of variable value to 0 at the start of each loop iteration as the value is never read and hence the assignment it is redundant. Addresses-Coverity: ("Unintended sign extension") Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek switches") Signed-off-by: Colin Ian King --- drivers/net/dsa/hirschmann/hellcreek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index dfa66f7260d6..d42f40c76ba5 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -308,7 +308,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch *ds, int port, const struct hellcreek_counter *counter = &hellcreek_counter[i]; u8 offset = counter->offset + port * 64; u16 high, low; - u64 value = 0; + u64 value; mutex_lock(&hellcreek->reg_lock); @@ -320,7 +320,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch *ds, int port, */ high = hellcreek_read(hellcreek, HR_CRDH); low = hellcreek_read(hellcreek, HR_CRDL); - value = (high << 16) | low; + value = ((u64)high << 16) | low; hellcreek_port->counter_values[i] += value; data[i] = hellcreek_port->counter_values[i]; -- 2.28.0
[PATCH net-next 1/1] net: phy: Allow mdio buses to probe C45 before falling back to C22
This patch makes mdiobus_scan() to try on C45 first as C45 can access all devices. This allows the function available for the PHY that supports for both C45 and C22. Reviewed-by: Voon Weifeng Reviewed-by: Ong Boon Leong Signed-off-by: Wong Vee Khee --- drivers/net/phy/mdio_bus.c | 5 + include/linux/phy.h| 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 56094dd6bf26..372d0d088f7e 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -691,6 +691,11 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) if (IS_ERR(phydev)) phydev = get_phy_device(bus, addr, true); break; + case MDIOBUS_C45_C22: + phydev = get_phy_device(bus, addr, true); + if (IS_ERR(phydev)) + phydev = get_phy_device(bus, addr, false); + break; } if (IS_ERR(phydev)) diff --git a/include/linux/phy.h b/include/linux/phy.h index 189bc9881ea6..73d9be2c00f4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -360,6 +360,7 @@ struct mii_bus { MDIOBUS_C22, MDIOBUS_C45, MDIOBUS_C22_C45, + MDIOBUS_C45_C22, } probe_capabilities; /** @shared_lock: protect access to the shared element */ -- 2.17.0
Re: [PATCH stable] net: sch_generic: fix the missing new qdisc assignment bug
On Tue, Nov 03, 2020 at 11:25:38AM +0800, Yunsheng Lin wrote: > commit 2fb541c862c9 ("net: sch_generic: aviod concurrent reset and enqueue op > for lockless qdisc") > > When the above upstream commit is backported to stable kernel, > one assignment is missing, which causes two problems reported > by Joakim and Vishwanath, see [1] and [2]. > > So add the assignment back to fix it. > > 1. https://www.spinics.net/lists/netdev/msg693916.html > 2. https://www.spinics.net/lists/netdev/msg695131.html > > Fixes: 749cc0b0c7f3 ("net: sch_generic: aviod concurrent reset and enqueue op > for lockless qdisc") > Signed-off-by: Yunsheng Lin > --- > net/sched/sch_generic.c | 3 +++ > 1 file changed, 3 insertions(+) What kernel tree(s) does this need to be backported to? thanks, greg k-h
Re: [PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift
On Mon Nov 09 2020, Colin King wrote: > From: Colin Ian King > > The left shift of u16 variable high is promoted to the type int and > then sign extended to a 64 bit u64 value. If the top bit of high is > set then the upper 32 bits of the result end up being set by the > sign extension. Fix this by explicitly casting the value in high to > a u64 before left shifting by 16 places. > > Also, remove the initialisation of variable value to 0 at the start > of each loop iteration as the value is never read and hence the > assignment it is redundant. > > Addresses-Coverity: ("Unintended sign extension") > Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek > switches") > Signed-off-by: Colin Ian King > --- > drivers/net/dsa/hirschmann/hellcreek.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/dsa/hirschmann/hellcreek.c > b/drivers/net/dsa/hirschmann/hellcreek.c > index dfa66f7260d6..d42f40c76ba5 100644 > --- a/drivers/net/dsa/hirschmann/hellcreek.c > +++ b/drivers/net/dsa/hirschmann/hellcreek.c > @@ -308,7 +308,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch > *ds, int port, > const struct hellcreek_counter *counter = &hellcreek_counter[i]; > u8 offset = counter->offset + port * 64; > u16 high, low; > - u64 value = 0; > + u64 value; > > mutex_lock(&hellcreek->reg_lock); > > @@ -320,7 +320,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch > *ds, int port, >*/ > high = hellcreek_read(hellcreek, HR_CRDH); > low = hellcreek_read(hellcreek, HR_CRDL); > - value = (high << 16) | low; > + value = ((u64)high << 16) | low; Looks good to me. Thank you. Thanks, Kurt signature.asc Description: PGP signature
[PATCH][next] mptcp: fix a dereference of pointer before msk is null checked.
From: Colin Ian King Currently the assignment of pointer net from the sock_net(sk) call is potentially dereferencing a null pointer sk. sk points to the same location as pointer msk and msk is being null checked after the sock_net call. Fix this by calling sock_net after the null check on pointer msk. Addresses-Coverity: ("Dereference before null check") Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout") Signed-off-by: Colin Ian King --- net/mptcp/pm_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ed60538df7b2..e76879ea5a30 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -206,13 +206,15 @@ static void mptcp_pm_add_timer(struct timer_list *timer) struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); + struct net *net; pr_debug("msk=%p", msk); if (!msk) return; + net = sock_net(sk); + if (inet_sk_state_load(sk) == TCP_CLOSE) return; -- 2.28.0
[PATCH] net: tcp: ratelimit warnings in tcp_recvmsg
From: Menglong Dong 'before(*seq, TCP_SKB_CB(skb)->seq) == true' means that one or more skbs are lost somehow. Once this happen, it seems that it will never recover automatically. As a result, a warning will be printed and a '-EAGAIN' will be returned in non-block mode. As a general suituation, users call 'poll' on a socket and then receive skbs with 'recv' in non-block mode. This mode will make every arriving skb of the socket trigger a warning. Plenty of skbs will cause high rate of kernel log. Besides, WARN is for indicating kernel bugs only and should not be user-triggable. Replace it with 'net_warn_ratelimited' here. Signed-off-by: Menglong Dong --- net/ipv4/tcp.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b2bc3d7fe9e8..5e38dfd03036 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2093,11 +2093,12 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Now that we have two receive queues this * shouldn't happen. */ - if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), -"TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n", -*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, -flags)) + if (unlikely(before(*seq, TCP_SKB_CB(skb)->seq))) { + net_warn_ratelimited("TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n", +*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, +flags); break; + } offset = *seq - TCP_SKB_CB(skb)->seq; if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { @@ -2108,9 +2109,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, goto found_ok_skb; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; - WARN(!(flags & MSG_PEEK), -"TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n", -*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); + + if (!(flags & MSG_PEEK)) + net_warn_ratelimited("TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n", +*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, +flags); } /* Well, if we have backlog, try to process it now yet. */ -- 2.25.1
Re: [PATCH V3] fsl/fman: add missing put_devcie() call in fman_port_probe()
在 2020/11/08 6:09, Jakub Kicinski 写道: On Sat, 7 Nov 2020 17:09:25 +0800 Yu Kuai wrote: if of_find_device_by_node() succeed, fman_port_probe() doesn't have a corresponding put_device(). Thus add jump target to fix the exception handling for this function implementation. Fixes: 0572054617f3 ("fsl/fman: fix dereference null return value") Signed-off-by: Yu Kuai @@ -1792,20 +1792,20 @@ static int fman_port_probe(struct platform_device *of_dev) if (!fm_node) { dev_err(port->dev, "%s: of_get_parent() failed\n", __func__); err = -ENODEV; - goto return_err; + goto free_port; And now you no longer put port_node if jumping from here... Sincerely apologize for that stupid mistake... Also does the reference to put_device() not have to be released when this function succeeds? I'm not sure about that, since fman_port_driver doesn't define other interface, maybe it reasonable to release it here. } @@ -1896,7 +1895,9 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; -return_err: +put_device: + put_device(&fm_pdev->dev); +put_node: of_node_put(port_node); free_port: kfree(port); .
BUG: sleeping function called from invalid context in corrupted
Hello, syzbot found the following issue on: HEAD commit:bf3e7628 Merge branch 'mtd/fixes' of git://git.kernel.org/.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=16d76e2a50 kernel config: https://syzkaller.appspot.com/x/.config?x=e791ddf0875adf65 dashboard link: https://syzkaller.appspot.com/bug?extid=b7aeb9318541a1c709f1 compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project.git ca2dcbd030eadbf0aa9b660efe864ff08af6e18b) syz repro: https://syzkaller.appspot.com/x/repro.syz?x=14df611a50 The issue was bisected to: commit dcd479e10a0510522a5d88b29b8f79ea3467d501 Author: Johannes Berg Date: Fri Oct 9 12:17:11 2020 + mac80211: always wind down STA state bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=1218ff1450 final oops: https://syzkaller.appspot.com/x/report.txt?x=1118ff1450 console output: https://syzkaller.appspot.com/x/log.txt?x=1618ff1450 IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+b7aeb9318541a1c70...@syzkaller.appspotmail.com Fixes: dcd479e10a05 ("mac80211: always wind down STA state") BUG: sleeping function called from invalid context at net/mac80211/sta_info.c:1962 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 35, name: kworker/u4:2 4 locks held by kworker/u4:2/35: #0: 88802af11138 ((wq_completion)phy4){+.+.}-{0:0}, at: process_one_work+0x6f4/0xfc0 kernel/workqueue.c:2245 #1: c9e0fd80 ((work_completion)(&sdata->work)){+.+.}-{0:0}, at: process_one_work+0x733/0xfc0 kernel/workqueue.c:2247 #2: 88802f27cd00 (&wdev->mtx){+.+.}-{3:3}, at: sdata_lock net/mac80211/ieee80211_i.h:1021 [inline] #2: 88802f27cd00 (&wdev->mtx){+.+.}-{3:3}, at: ieee80211_ibss_work+0x4e/0x1450 net/mac80211/ibss.c:1683 --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot. For information about bisection process see: https://goo.gl/tpsmEJ#bisection syzbot can test patches for this issue, for details see: https://goo.gl/tpsmEJ#testing-patches
答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
Hi > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong > wrote: > > > > In many case, we need to check return value of pm_runtime_get_sync, > > but it brings a trouble to the usage counter processing. Many callers > > forget to decrease the usage counter when it failed. It has been > > discussed a lot[0][1]. So we add a function to deal with the usage > > counter for better coding. > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951 > > 48.10995-1-dinghao@zju.edu.cn/ > > Signed-off-by: Zhang Qilong > > --- > > include/linux/pm_runtime.h | 32 > > 1 file changed, 32 insertions(+) > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > index 4b708f4e8eed..2b0af5b1dffd 100644 > > --- a/include/linux/pm_runtime.h > > +++ b/include/linux/pm_runtime.h > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device > *dev) > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > +/** > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device and > resume it. > > + * @dev: Target device. > > The force argument is not documented. (1) Good catch, I will add it in next version. > > > + * > > + * Increase runtime PM usage counter of @dev first, and carry out > > + runtime-resume > > + * of it synchronously. If __pm_runtime_resume return negative > > + value(device is in > > + * error state) or return positive value(the runtime of device is > > + already active) > > + * with force is true, it need decrease the usage counter of the > > + device when > > + * return. > > + * > > + * The possible return values of this function is zero or negative value. > > + * zero: > > + *- it means success and the status will store the resume operation > status > > + * if needed, the runtime PM usage counter of @dev remains > incremented. > > + * negative: > > + *- it means failure and the runtime PM usage counter of @dev has > been > > + * decreased. > > + * positive: > > + *- it means the runtime of the device is already active before that. > > If > > + * caller set force to true, we still need to decrease the usage > counter. > > Why is this needed? (2) If caller set force, it means caller will return even the device has already been active (__pm_runtime_resume return positive value) after calling gene_pm_runtime_get_sync, we still need to decrease the usage count. > > > + */ > > +static inline int gene_pm_runtime_get_sync(struct device *dev, bool > > +force) > > The name is not really a good one and note that pm_runtime_get() has the > same problem as _get_sync() (ie. the usage counter is incremented regardless > of the return value). > (3) I have not thought a good name now, if you have good ideas, welcome. Thanks, Zhang > > +{ > > + int ret = 0; > > + > > + ret = __pm_runtime_resume(dev, RPM_GET_PUT); > > + if (ret < 0 || (ret > 0 && force)) > > + pm_runtime_put_noidle(dev); > > + > > + return ret; > > +} > > + > > /** > > * pm_runtime_put - Drop device usage counter and queue up "idle check" > if 0. > > * @dev: Target device. > > -- > > Thanks!
Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.
On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote: > From: wenxu > > Currently kernel tc subsystem can do conntrack in act_ct. But when several > fragment packets go through the act_ct, function tcf_ct_handle_fragments > will defrag the packets to a big one. But the last action will redirect > mirred to a device which maybe lead the reassembly big packet over the mtu > of target device. > > This patch add support for a xmit hook to mirred, that gets executed before > xmiting the packet. Then, when act_ct gets loaded, it configs that hook. > The frag xmit hook maybe reused by other modules. > > Signed-off-by: wenxu > --- > v2: Fix the crash for act_frag module without load > v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled > in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook > v4: using skb_protocol and fix line length exceeds 80 columns > v5: no change > > include/net/act_api.h | 16 + > net/sched/Kconfig | 13 > net/sched/Makefile | 1 + > net/sched/act_api.c| 51 +++ > net/sched/act_ct.c | 7 +++ > net/sched/act_frag.c | 164 > + > net/sched/act_mirred.c | 2 +- > 7 files changed, 253 insertions(+), 1 deletion(-) > create mode 100644 net/sched/act_frag.c > > diff --git a/include/net/act_api.h b/include/net/act_api.h > index 8721492..403a618 100644 > --- a/include/net/act_api.h > +++ b/include/net/act_api.h > @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct > tcf_proto *tp, >struct netlink_ext_ack *newchain); > struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, >struct tcf_chain *newchain); > + > +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff > *skb)); > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, > +int (*xmit)(struct sk_buff *skb))); > +void tcf_clear_xmit_hook(void); > + > +#if IS_ENABLED(CONFIG_NET_ACT_FRAG) > +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff > *skb)); > +#else > +static inline int tcf_frag_xmit_hook(struct sk_buff *skb, > + int (*xmit)(struct sk_buff *skb)) > +{ > + return 0; > +} > +#endif > + > #endif /* CONFIG_NET_CLS_ACT */ > > static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, > diff --git a/net/sched/Kconfig b/net/sched/Kconfig > index a3b37d8..9a240c7 100644 > --- a/net/sched/Kconfig > +++ b/net/sched/Kconfig > @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY > To compile this code as a module, choose M here: the > module will be called act_tunnel_key. > > +config NET_ACT_FRAG > + tristate "Packet fragmentation" > + depends on NET_CLS_ACT > + help > + Say Y here to allow fragmenting big packets when outputting > + with the mirred action. > + > + If unsure, say N. > + > + To compile this code as a module, choose M here: the > + module will be called act_frag. > + Just wondering, what is the motivation for putting the frag code into standalone module? It doesn't implement usual act_* interface and is not user-configurable. To me it looks like functionality that belongs to act_api. Am I missing something? > config NET_ACT_CT > tristate "connection tracking tc action" > depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE > + depends on NET_ACT_FRAG > help > Say Y here to allow sending the packets to conntrack module. > > diff --git a/net/sched/Makefile b/net/sched/Makefile > index 66bbf9a..c146186 100644 > --- a/net/sched/Makefile > +++ b/net/sched/Makefile > @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o > obj-$(CONFIG_NET_IFE_SKBPRIO)+= act_meta_skbprio.o > obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o > obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o > +obj-$(CONFIG_NET_ACT_FRAG) += act_frag.o > obj-$(CONFIG_NET_ACT_CT) += act_ct.o > obj-$(CONFIG_NET_ACT_GATE) += act_gate.o > obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o > diff --git a/net/sched/act_api.c b/net/sched/act_api.c > index f66417d..e7b501c 100644 > --- a/net/sched/act_api.c > +++ b/net/sched/act_api.c > @@ -22,6 +22,57 @@ > #include > #include > > +static int (*tcf_xmit_hook)(struct sk_buff *skb, > + int (*xmit)(struct sk_buff *skb)); > +static DEFINE_STATIC_KEY_FALSE(tcf_xmit_hook_in_use); > + > +static void tcf_inc_xmit_hook(void) > +{ > + static_branch_inc(&tcf_xmit_hook_in_use); > +} > + > +static void tcf_dec_xmit_hook(void) > +{ > + static_branch_dec(&tcf_xmit_hook_in_use); > +} > + > +static bool tcf_xmit_hook_enabled(void) > +{ > + return static_branch_unlikely(&tcf_xmit_hook_in_use); > +} > + > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, > +
[PATCH V2] memory: tegra: add missing put_devcie() call in error path of tegra_emc_probe()
The reference to device obtained with of_find_device_by_node() should be dropped. Thus add jump target to fix the exception handling for this function implementation. Fixes: 73a7f0a90641("memory: tegra: Add EMC (external memory controller) driver") Signed-off-by: Yu Kuai --- drivers/memory/tegra/tegra124-emc.c | 21 +-- .../net/ethernet/freescale/fman/fman_port.c | 3 +-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/memory/tegra/tegra124-emc.c b/drivers/memory/tegra/tegra124-emc.c index 76ace42a688a..7d58a0e0a177 100644 --- a/drivers/memory/tegra/tegra124-emc.c +++ b/drivers/memory/tegra/tegra124-emc.c @@ -1207,8 +1207,10 @@ static int tegra_emc_probe(struct platform_device *pdev) return -ENOENT; emc->mc = platform_get_drvdata(mc); - if (!emc->mc) - return -EPROBE_DEFER; + if (!emc->mc) { + err = -EPROBE_DEFER; + goto put_device; + } ram_code = tegra_read_ram_code(); @@ -1217,25 +1219,27 @@ static int tegra_emc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "no memory timings for RAM code %u found in DT\n", ram_code); - return -ENOENT; + err = -ENOENT; + goto put_device; } err = tegra_emc_load_timings_from_dt(emc, np); of_node_put(np); if (err) - return err; + goto put_device; if (emc->num_timings == 0) { dev_err(&pdev->dev, "no memory timings for RAM code %u registered\n", ram_code); - return -ENOENT; + err = -ENOENT; + goto put_device; } err = emc_init(emc); if (err) { dev_err(&pdev->dev, "EMC initialization failed: %d\n", err); - return err; + goto put_device; } platform_set_drvdata(pdev, emc); @@ -1244,6 +1248,11 @@ static int tegra_emc_probe(struct platform_device *pdev) emc_debugfs_init(&pdev->dev, emc); return 0; + +put_device: + put_device(&mc->dev); + + return err; }; static struct platform_driver tegra_emc_driver = { diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 9790e483241b..fcc59444df17 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1792,7 +1792,7 @@ static int fman_port_probe(struct platform_device *of_dev) if (!fm_node) { dev_err(port->dev, "%s: of_get_parent() failed\n", __func__); err = -ENODEV; - goto free_port; + goto put_node; } fm_pdev = of_find_device_by_node(fm_node); @@ -1899,7 +1899,6 @@ static int fman_port_probe(struct platform_device *of_dev) put_device(&fm_pdev->dev); put_node: of_node_put(port_node); -free_port: kfree(port); return err; } -- 2.25.4
Re: [PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift
On Mon Nov 09 2020, Colin King wrote: > From: Colin Ian King > > The left shift of u16 variable high is promoted to the type int and > then sign extended to a 64 bit u64 value. If the top bit of high is > set then the upper 32 bits of the result end up being set by the > sign extension. Fix this by explicitly casting the value in high to > a u64 before left shifting by 16 places. > > Also, remove the initialisation of variable value to 0 at the start > of each loop iteration as the value is never read and hence the > assignment it is redundant. > > Addresses-Coverity: ("Unintended sign extension") > Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek > switches") > Signed-off-by: Colin Ian King Reviewed-by: Kurt Kanzenbach signature.asc Description: PGP signature
Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 2:24 PM zhangqilong wrote: > > Hi > > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong > > wrote: > > > > > > In many case, we need to check return value of pm_runtime_get_sync, > > > but it brings a trouble to the usage counter processing. Many callers > > > forget to decrease the usage counter when it failed. It has been > > > discussed a lot[0][1]. So we add a function to deal with the usage > > > counter for better coding. > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951 > > > 48.10995-1-dinghao@zju.edu.cn/ > > > Signed-off-by: Zhang Qilong > > > --- > > > include/linux/pm_runtime.h | 32 > > > 1 file changed, 32 insertions(+) > > > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > > index 4b708f4e8eed..2b0af5b1dffd 100644 > > > --- a/include/linux/pm_runtime.h > > > +++ b/include/linux/pm_runtime.h > > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device > > *dev) > > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > > > +/** > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device and > > resume it. > > > + * @dev: Target device. > > > > The force argument is not documented. > > (1) Good catch, I will add it in next version. > > > > > > + * > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > + runtime-resume > > > + * of it synchronously. If __pm_runtime_resume return negative > > > + value(device is in > > > + * error state) or return positive value(the runtime of device is > > > + already active) > > > + * with force is true, it need decrease the usage counter of the > > > + device when > > > + * return. > > > + * > > > + * The possible return values of this function is zero or negative value. > > > + * zero: > > > + *- it means success and the status will store the resume operation > > status > > > + * if needed, the runtime PM usage counter of @dev remains > > incremented. > > > + * negative: > > > + *- it means failure and the runtime PM usage counter of @dev has > > been > > > + * decreased. > > > + * positive: > > > + *- it means the runtime of the device is already active before > > > that. If > > > + * caller set force to true, we still need to decrease the usage > > counter. > > > > Why is this needed? > > (2) If caller set force, it means caller will return even the device has > already been active > (__pm_runtime_resume return positive value) after calling > gene_pm_runtime_get_sync, > we still need to decrease the usage count. But who needs this? I don't think that it is a good idea to complicate the API this way.
[PATCH ethtool 2/2] ethtool: Improve compatibility between netlink and ioctl interfaces
From: Ido Schimmel With the ioctl interface, when autoneg is enabled, but without specifying speed, duplex or link modes, the advertised link modes are set to the supported link modes by the ethtool user space utility. This does not happen when using the netlink interface. Fix this incompatibility problem by having ethtool query the supported link modes from the kernel and advertise all the "real" ones when only "autoneg on" is specified. Before: Settings for eth0: Supported ports: [ TP ] Supported link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full 1000baseT/Full Supported pause frame use: No Supports auto-negotiation: Yes Supported FEC modes: Not reported Advertised link modes: 100baseT/Half 100baseT/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Speed: 1000Mb/s Duplex: Full Auto-negotiation: on Port: Twisted Pair PHYAD: 0 Transceiver: internal MDI-X: off (auto) Supports Wake-on: umbg Wake-on: d Current message level: 0x0007 (7) drv probe link Link detected: yes After: Settings for eth0: Supported ports: [ TP ] Supported link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full 1000baseT/Full Supported pause frame use: No Supports auto-negotiation: Yes Supported FEC modes: Not reported Advertised link modes: 10baseT/Half 10baseT/Full 100baseT/Half 100baseT/Full 1000baseT/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Speed: 1000Mb/s Duplex: Full Auto-negotiation: on Port: Twisted Pair PHYAD: 0 Transceiver: internal MDI-X: on (auto) Supports Wake-on: umbg Wake-on: d Current message level: 0x0007 (7) drv probe link Link detected: yes Signed-off-by: Ido Schimmel Signed-off-by: Michal Kubecek --- netlink/settings.c | 92 ++ 1 file changed, 92 insertions(+) diff --git a/netlink/settings.c b/netlink/settings.c index dc9280c114b5..90c28b1bc424 100644 --- a/netlink/settings.c +++ b/netlink/settings.c @@ -1115,6 +1115,93 @@ static const struct param_parser sset_params[] = { */ #define SSET_MAX_MSGS 4 +static int linkmodes_reply_advert_all_cb(const struct nlmsghdr *nlhdr, +void *data) +{ + const struct nlattr *tb[ETHTOOL_A_LINKMODES_MAX + 1] = {}; + DECLARE_ATTR_TB_INFO(tb); + struct nl_msg_buff *req_msgbuff = data; + const struct nlattr *ours_attr; + struct nlattr *req_bitset; + uint32_t *supported_modes; + unsigned int modes_count; + unsigned int i; + int ret; + + ret = mnl_attr_parse(nlhdr, GENL_HDRLEN, attr_cb, &tb_info); + if (ret < 0) + return MNL_CB_ERROR; + ours_attr = tb[ETHTOOL_A_LINKMODES_OURS]; + if (!ours_attr) + return MNL_CB_ERROR; + modes_count = bitset_get_count(tb[ETHTOOL_A_LINKMODES_OURS], &ret); + if (ret < 0) + return MNL_CB_ERROR; + supported_modes = get_compact_bitset_mask(tb[ETHTOOL_A_LINKMODES_OURS]); + if (!supported_modes) + return MNL_CB_ERROR; + + /* keep only "real" link modes */ + for (i = 0; i < modes_count; i++) + if (!lm_class_match(i, LM_CLASS_REAL)) + supported_modes[i / 32] &= ~((uint32_t)1 << (i % 32)); + + req_bitset = ethnla_nest_start(req_msgbuff, ETHTOOL_A_LINKMODES_OURS); + if (!req_bitset) + return MNL_CB_ERROR; + + if (ethnla_put_u32(req_msgbuff, ETHTOOL_A_BITSET_SIZE, modes_count) || + ethnla_put(req_msgbuff, ETHTOOL_A_BITSET_VALUE, + DIV_ROUND_UP(modes_count, 32) * sizeof(uint32_t), + supported_modes) || + ethnla_put(req_msgbuff, ETHTOOL_A_BITSET_MASK, + DIV_ROUND_UP(modes_count, 32) * sizeof(uint32_t), + supported_modes)) { + ethnla_nest_cancel(req_msgbuff, req_bitset); + return MNL_CB_ERROR; + } + + ethnla_nest_end(req_msgbuff, req_bitset); + return MNL_CB_OK; +} + +/* For compatibility reasons with ioctl-based ethtool, when "autoneg on" is + * specified without "advertise", "speed" and "duplex", we need to query the + * supported link modes from the kernel and advertise all the "real" ones. + */ +static int nl_sset_compat_linkmodes(struct nl_context *nlctx, +
[PATCH ethtool 1/2] netlink: do not send messages and process replies in nl_parser()
When called with group_style = PARSER_GROUP_MSG, nl_parser() not only parses the command line and composes the messages but also sends them to kernel and processes the replies. This is inconsistent with other modes and also impractical as it takes the control over the process from caller where it belongs. Modify nl_parser() to pass composed messages back to caller (which is only nl_sset() at the moment) and let it send requests and process replies. This will be needed for an upcoming backward compatibility patch which will need to inspect and possibly modify one of the composed messages. Signed-off-by: Michal Kubecek --- netlink/cable_test.c | 2 +- netlink/channels.c | 2 +- netlink/coalesce.c | 2 +- netlink/eee.c| 2 +- netlink/parser.c | 43 --- netlink/parser.h | 3 ++- netlink/pause.c | 2 +- netlink/rings.c | 2 +- netlink/settings.c | 35 ++- 9 files changed, 66 insertions(+), 27 deletions(-) diff --git a/netlink/cable_test.c b/netlink/cable_test.c index 8a7145324610..17139f7d297d 100644 --- a/netlink/cable_test.c +++ b/netlink/cable_test.c @@ -574,7 +574,7 @@ int nl_cable_test_tdr(struct cmd_context *ctx) ctx->devname, 0)) return -EMSGSIZE; - ret = nl_parser(nlctx, tdr_params, NULL, PARSER_GROUP_NEST); + ret = nl_parser(nlctx, tdr_params, NULL, PARSER_GROUP_NEST, NULL); if (ret < 0) return ret; diff --git a/netlink/channels.c b/netlink/channels.c index c6002ceeb121..894c74bcc11a 100644 --- a/netlink/channels.c +++ b/netlink/channels.c @@ -126,7 +126,7 @@ int nl_schannels(struct cmd_context *ctx) ctx->devname, 0)) return -EMSGSIZE; - ret = nl_parser(nlctx, schannels_params, NULL, PARSER_GROUP_NONE); + ret = nl_parser(nlctx, schannels_params, NULL, PARSER_GROUP_NONE, NULL); if (ret < 0) return 1; diff --git a/netlink/coalesce.c b/netlink/coalesce.c index 07a92d04b7a1..75922a91c2e7 100644 --- a/netlink/coalesce.c +++ b/netlink/coalesce.c @@ -254,7 +254,7 @@ int nl_scoalesce(struct cmd_context *ctx) ctx->devname, 0)) return -EMSGSIZE; - ret = nl_parser(nlctx, scoalesce_params, NULL, PARSER_GROUP_NONE); + ret = nl_parser(nlctx, scoalesce_params, NULL, PARSER_GROUP_NONE, NULL); if (ret < 0) return 1; diff --git a/netlink/eee.c b/netlink/eee.c index d3135b2094a4..04d8f0bbe3fc 100644 --- a/netlink/eee.c +++ b/netlink/eee.c @@ -174,7 +174,7 @@ int nl_seee(struct cmd_context *ctx) ctx->devname, 0)) return -EMSGSIZE; - ret = nl_parser(nlctx, seee_params, NULL, PARSER_GROUP_NONE); + ret = nl_parser(nlctx, seee_params, NULL, PARSER_GROUP_NONE, NULL); if (ret < 0) return 1; diff --git a/netlink/parser.c b/netlink/parser.c index 3b25f5d5a88e..c2eae93efb69 100644 --- a/netlink/parser.c +++ b/netlink/parser.c @@ -920,7 +920,7 @@ static void __parser_set(uint64_t *map, unsigned int idx) } struct tmp_buff { - struct nl_msg_buff msgbuff; + struct nl_msg_buff *msgbuff; unsigned intid; unsigned intorig_len; struct tmp_buff *next; @@ -951,7 +951,12 @@ static struct tmp_buff *tmp_buff_find_or_create(struct tmp_buff **phead, if (!new_buff) return NULL; new_buff->id = id; - msgbuff_init(&new_buff->msgbuff); + new_buff->msgbuff = malloc(sizeof(*new_buff->msgbuff)); + if (!new_buff->msgbuff) { + free(new_buff); + return NULL; + } + msgbuff_init(new_buff->msgbuff); new_buff->next = NULL; *pbuff = new_buff; @@ -965,7 +970,10 @@ static void tmp_buff_destroy(struct tmp_buff *head) while (buff) { next = buff->next; - msgbuff_done(&buff->msgbuff); + if (buff->msgbuff) { + msgbuff_done(buff->msgbuff); + free(buff->msgbuff); + } free(buff); buff = next; } @@ -980,13 +988,22 @@ static void tmp_buff_destroy(struct tmp_buff *head) * param_parser::offset) * @group_style: defines if identifiers in .group represent separate messages, * nested attributes or are not allowed + * @msgbuffs:(only used for @group_style = PARSER_GROUP_MSG) array to store + * pointers to composed messages; caller must make sure this + * array is sufficient, i.e. that it has at least as many entries + * as the number of different .group values in params array; + * entries are filled from the start, remaining entries are not + * modified; caller should zero initialize
[PATCH ethtool 0/2] netlink: improve compatibility with ioctl interface
Restore special behavior of "ethtool -s autoneg on" if no advertised modes, speed and duplex are requested: ioctl code enables all link modes supported by the device. This is most important for network devices which report no advertised modes when autonegotiation is disabled. First patch cleans up the parser interface; it allows nl_sset() to inspect the composed message and append an attribute to it if needed. Ido Schimmel (1): ethtool: Improve compatibility between netlink and ioctl interfaces Michal Kubecek (1): netlink: do not send messages and process replies in nl_parser() netlink/cable_test.c | 2 +- netlink/channels.c | 2 +- netlink/coalesce.c | 2 +- netlink/eee.c| 2 +- netlink/parser.c | 43 ++- netlink/parser.h | 3 +- netlink/pause.c | 2 +- netlink/rings.c | 2 +- netlink/settings.c | 127 +-- 9 files changed, 158 insertions(+), 27 deletions(-) -- 2.29.2
Re: [PATCH net-next 1/1] net: phy: Allow mdio buses to probe C45 before falling back to C22
On Mon, Nov 09, 2020 at 08:43:47PM +0800, Wong Vee Khee wrote: > This patch makes mdiobus_scan() to try on C45 first as C45 can access > all devices. This allows the function available for the PHY that > supports for both C45 and C22. > > Reviewed-by: Voon Weifeng > Reviewed-by: Ong Boon Leong > Signed-off-by: Wong Vee Khee Hi You need to add a user of this. And i would like to see a more detailed explanation of why it is needed. The PHY driver is free to do either C45 or C22 transfers. Why does it care how the device was found? Plus you can generally access C45 registers via the C45 over C22. If the PHY does not allow C45 over C22, then i expect the driver needs to be aware of if the PHY can be access either way, and it needs to do different things. And there is no PHY driver that i know of which does this. So before this goes any further, we need to see the bigger picture. Andrew
re: net: dsa: hellcreek: Add support for hardware timestamping
Hi Static analysis on linux-next with Coverity has detected a potential null pointer dereference issue on the following commit: commit f0d4ba9eff75a79fccb7793f4d9f12303d458603 Author: Kamil Alkhouri Date: Tue Nov 3 08:10:58 2020 +0100 net: dsa: hellcreek: Add support for hardware timestamping The analysis is as follows: 323/* Get nanoseconds from ptp packet */ 324type = SKB_PTP_TYPE(skb); 4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12 times). 5. var_assigned: Assigning: hdr = NULL return value from ptp_parse_header. 325hdr = ptp_parse_header(skb, type); Dereference null return value (NULL_RETURNS) 6. dereference: Dereferencing a pointer that might be NULL hdr when calling hellcreek_get_reserved_field. 326ns = hellcreek_get_reserved_field(hdr); 327hellcreek_clear_reserved_field(hdr); This issue can only occur if the type & PTP_CLASS_PMASK is not one of PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2. I'm not sure if this is a possibility or not, but I'm assuming that it would be useful to perform the null check just in case, but I'm not sure how this affects the hw timestamping code in this function. Colin
Re: [PATCH v4 4/7] can: replace can_dlc as variable/element for payload length
On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote: > diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h > index b2e8df8e4cb0..72671184a7a2 100644 > --- a/include/linux/can/dev.h > +++ b/include/linux/can/dev.h > @@ -183,12 +183,12 @@ static inline void can_set_static_ctrlmode(struct > net_device *dev, > /* override MTU which was set by default in can_setup()? */ > if (static_mode & CAN_CTRLMODE_FD) > dev->mtu = CANFD_MTU; > } > > -/* get data length from can_dlc with sanitized can_dlc */ > -u8 can_dlc2len(u8 can_dlc); > +/* get data length from raw data length code (DLC) */ /* * convert a given data length code (dlc) of an FD CAN frame into a * valid data length of max. 64 bytes. */ I missed this point during my previous review: the can_dlc2len() function is only valid for CAN FD frames. Comments should reflect this fact. > +u8 can_dlc2len(u8 dlc); Concerning the name: * can_get_cc_len() converts a Classical CAN frame DLC into a data length. * can_dlc2len() converts an FD CAN frame DLC into a data length. Just realized that both macro/function do similar things so we could think of a similar naming as well. * Example 1: can_get_cc_len() and can_get_fd_len() * Example 2: can_cc_dlc2len() and can_fd_dlc2len() Or we could simply leave things as they are, this is not a big issue as long as the comments clearly state which one is for classical frames and which one is for FD frames. > > /* map the sanitized data length to an appropriate data length code */ > u8 can_len2dlc(u8 len); can_len2dlc() might be renamed (e.g. can_get_fd_dlc()) if Example 1 solution is chosen. > struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int > echo_skb_max, Yours sincerely, Vincent Mailhol
Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong wrote: > > In many case, we need to check return value of pm_runtime_get_sync, but > it brings a trouble to the usage counter processing. Many callers forget > to decrease the usage counter when it failed. It has been discussed a > lot[0][1]. So we add a function to deal with the usage counter for better > coding. > > [0]https://lkml.org/lkml/2020/6/14/88 > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ > Signed-off-by: Zhang Qilong > --- > include/linux/pm_runtime.h | 32 > 1 file changed, 32 insertions(+) > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > index 4b708f4e8eed..2b0af5b1dffd 100644 > --- a/include/linux/pm_runtime.h > +++ b/include/linux/pm_runtime.h > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device *dev) > return __pm_runtime_resume(dev, RPM_GET_PUT); > } > > +/** > + * gene_pm_runtime_get_sync - Bump up usage counter of a device and resume > it. > + * @dev: Target device. The force argument is not documented. > + * > + * Increase runtime PM usage counter of @dev first, and carry out > runtime-resume > + * of it synchronously. If __pm_runtime_resume return negative value(device > is in > + * error state) or return positive value(the runtime of device is already > active) > + * with force is true, it need decrease the usage counter of the device when > + * return. > + * > + * The possible return values of this function is zero or negative value. > + * zero: > + *- it means success and the status will store the resume operation > status > + * if needed, the runtime PM usage counter of @dev remains incremented. > + * negative: > + *- it means failure and the runtime PM usage counter of @dev has been > + * decreased. > + * positive: > + *- it means the runtime of the device is already active before that. If > + * caller set force to true, we still need to decrease the usage > counter. Why is this needed? > + */ > +static inline int gene_pm_runtime_get_sync(struct device *dev, bool force) The name is not really a good one and note that pm_runtime_get() has the same problem as _get_sync() (ie. the usage counter is incremented regardless of the return value). > +{ > + int ret = 0; > + > + ret = __pm_runtime_resume(dev, RPM_GET_PUT); > + if (ret < 0 || (ret > 0 && force)) > + pm_runtime_put_noidle(dev); > + > + return ret; > +} > + > /** > * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. > * @dev: Target device. > -- Thanks!
答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
Hi, > > On Mon, Nov 9, 2020 at 2:24 PM zhangqilong > wrote: > > > > Hi > > > > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong > > > > > > wrote: > > > > > > > > In many case, we need to check return value of > > > > pm_runtime_get_sync, but it brings a trouble to the usage counter > > > > processing. Many callers forget to decrease the usage counter when > > > > it failed. It has been discussed a lot[0][1]. So we add a function > > > > to deal with the usage counter for better coding. > > > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520 > > > > 0951 48.10995-1-dinghao@zju.edu.cn/ > > > > Signed-off-by: Zhang Qilong > > > > --- > > > > include/linux/pm_runtime.h | 32 > > > > > 1 file changed, 32 insertions(+) > > > > > > > > diff --git a/include/linux/pm_runtime.h > > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd > > > > 100644 > > > > --- a/include/linux/pm_runtime.h > > > > +++ b/include/linux/pm_runtime.h > > > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct > > > > device > > > *dev) > > > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > > > > > +/** > > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device > > > > +and > > > resume it. > > > > + * @dev: Target device. > > > > > > The force argument is not documented. > > > > (1) Good catch, I will add it in next version. > > > > > > > > > + * > > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > > + runtime-resume > > > > + * of it synchronously. If __pm_runtime_resume return negative > > > > + value(device is in > > > > + * error state) or return positive value(the runtime of device is > > > > + already active) > > > > + * with force is true, it need decrease the usage counter of the > > > > + device when > > > > + * return. > > > > + * > > > > + * The possible return values of this function is zero or negative > > > > value. > > > > + * zero: > > > > + *- it means success and the status will store the resume operation > > > status > > > > + * if needed, the runtime PM usage counter of @dev remains > > > incremented. > > > > + * negative: > > > > + *- it means failure and the runtime PM usage counter of @dev has > > > been > > > > + * decreased. > > > > + * positive: > > > > + *- it means the runtime of the device is already active before > > > > that. > If > > > > + * caller set force to true, we still need to decrease the usage > > > counter. > > > > > > Why is this needed? > > > > (2) If caller set force, it means caller will return even the device > > has already been active (__pm_runtime_resume return positive value) > > after calling gene_pm_runtime_get_sync, we still need to decrease the > usage count. > > But who needs this? > > I don't think that it is a good idea to complicate the API this way. The callers like: ret = pm_runtime_get_sync(dev); if (ret) { ... return (xxx); } drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn: pm_runtime_get_sync() also returns 1 on success drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn: pm_runtime_get_sync() also returns 1 on success drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn: pm_runtime_get_sync() also returns 1 on success drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn: pm_runtime_get_sync() also returns 1 on success drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn: pm_runtime_get_sync() also returns 1 on success drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device() warn: 'pm_runtime_get_sync(&pdev->dev)' returns positive and negative drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn: pm_runtime_get_sync() also returns 1 on success drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream() warn: pm_runtime_get_sync() also returns 1 on success drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89 mtk_vcodec_dec_pw_on() warn: pm_runtime_get_sync() also returns 1 on success drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn: pm_runtime_get_sync() also returns 1 on success drivers/media/platform/ti-vpe/vpe.c:2478 vpe_runtime_get() warn: pm_runtime_get_sync() also returns 1 on success drivers/media/i2c/smiapp/smiapp-core.c:1529 smiapp_pm_get_init() warn: pm_runtime_get_sync() also returns 1 on success ... they need it to simplify the function. If we only want to simplify like ret = pm_runtime_get_sync(dev); if (ret < 0) { ... Return (xxx) } The parameter force could be removed. Thanks, Zhang
Re: [PATCH v2] Bluetooth: Move force_bredr_smp debugfs into hci_debugfs_create_bredr
Hi Claire, > Avoid multiple attempts to create the debugfs entry, force_bredr_smp, > by moving it from the SMP registration to the BR/EDR controller init > section. hci_debugfs_create_bredr is only called when HCI_SETUP and > HCI_CONFIG is not set. > > Signed-off-by: Claire Chang > --- > v2: correct a typo in commit message > > net/bluetooth/hci_debugfs.c | 50 + > net/bluetooth/smp.c | 44 ++-- > net/bluetooth/smp.h | 2 ++ > 3 files changed, 54 insertions(+), 42 deletions(-) patch has been applied to bluetooth-next tree. Regards Marcel
Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors
On Mon Nov 9, 2020 at 3:38 PM CET, Vladimir Oltean wrote: > On Mon, Nov 09, 2020 at 02:31:11PM +0200, Vladimir Oltean wrote: > > I need to sit on this for a while. How many DSA drivers do we have that > > don't do SA learning in hardware for CPU-injected packets? ocelot/felix > > and mv88e6xxx? Who else? Because if there aren't that many (or any at > > all except for these two), then I could try to spend some time and see > > how Felix behaves when I send FORWARD frames to it. Then we could go on > > full blast with the other alternative, to force-enable address learning > > from the CPU port, and declare this one as too complicated and not worth > > the effort. > > In fact I'm not sure that I should be expecting an answer to this > question. We can evaluate the other alternative in parallel. Would you > be so kind to send some sort of RFC for your TX-side offload_fwd_mark so > that I could test with the hardware I have, and get a better > understanding > of the limitations there? That is the plan. I have some stuff I need to get done before though. The current implementation is on a 4.19 kernel, so it's going to take some time to rebase it.
re: net: dsa: hellcreek: Add support for hardware timestamping
Hi Colin, On Mon Nov 09 2020, Colin Ian King wrote: > Hi > > Static analysis on linux-next with Coverity has detected a potential > null pointer dereference issue on the following commit: > > commit f0d4ba9eff75a79fccb7793f4d9f12303d458603 > Author: Kamil Alkhouri > Date: Tue Nov 3 08:10:58 2020 +0100 > > net: dsa: hellcreek: Add support for hardware timestamping > > The analysis is as follows: > > 323/* Get nanoseconds from ptp packet */ > 324type = SKB_PTP_TYPE(skb); > >4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12 > times). >5. var_assigned: Assigning: hdr = NULL return value from > ptp_parse_header. > > 325hdr = ptp_parse_header(skb, type); > >Dereference null return value (NULL_RETURNS) >6. dereference: Dereferencing a pointer that might be NULL hdr when > calling hellcreek_get_reserved_field. > > 326ns = hellcreek_get_reserved_field(hdr); > 327hellcreek_clear_reserved_field(hdr); > > This issue can only occur if the type & PTP_CLASS_PMASK is not one of > PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2. I'm not sure if this is > a possibility or not, but I'm assuming that it would be useful to > perform the null check just in case, but I'm not sure how this affects > the hw timestamping code in this function. I don't see how the null pointer dereference could happen. That's the Rx path you showed above. The counter part code is: hellcreek_port_rxtstamp: /* Make sure the message is a PTP message that needs to be timestamped * and the interaction with the HW timestamping is enabled. If not, stop * here */ hdr = hellcreek_should_tstamp(hellcreek, port, skb, type); if (!hdr) return false; SKB_PTP_TYPE(skb) = type; Here the type is stored and hellcreek_should_tstamp() also calls ptp_parse_header() internally. Only when ptp_parse_header() didn't return NULL the first time the timestamping continues. It should be safe. Also the error handling would be interesting at that point. What should happen if the header is null then? Returning an invalid timestamp? Ignore it? Hm. I think we have to make sure that it is a valid ptp packet before reaching this code and that's what we've implemented. So, I guess it's OK. Thanks, Kurt signature.asc Description: PGP signature
[MPTCP][PATCH net 1/2] mptcp: fix static checker warnings in mptcp_pm_add_timer
Fix the following Smatch complaint: net/mptcp/pm_netlink.c:213 mptcp_pm_add_timer() warn: variable dereferenced before check 'msk' (see line 208) net/mptcp/pm_netlink.c 207 struct mptcp_sock *msk = entry->sock; 208 struct sock *sk = (struct sock *)msk; 209 struct net *net = sock_net(sk); ^^ "msk" dereferenced here. 210 211 pr_debug("msk=%p", msk); 212 213 if (!msk) Too late. 214 return; 215 Fixes: 93f323b9 ("mptcp: add a new sysctl add_addr_timeout") Reported-by: Dan Carpenter Signed-off-by: Geliang Tang Reviewed-by: Dan Carpenter --- net/mptcp/pm_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6180a8b39a3f..03f2c28f11f5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -206,7 +206,6 @@ static void mptcp_pm_add_timer(struct timer_list *timer) struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); pr_debug("msk=%p", msk); @@ -235,7 +234,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) sk_reset_timer(sk, timer, - jiffies + mptcp_get_add_addr_timeout(net)); + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); spin_unlock_bh(&msk->pm.lock); -- 2.26.2
Re: [PATCH v2 net] ethtool: netlink: add missing netdev_features_change() call
On Sun, Nov 08, 2020 at 12:46:15AM +, Alexander Lobakin wrote: > After updating userspace Ethtool from 5.7 to 5.9, I noticed that > NETDEV_FEAT_CHANGE is no more raised when changing netdev features > through Ethtool. > That's because the old Ethtool ioctl interface always calls > netdev_features_change() at the end of user request processing to > inform the kernel that our netdevice has some features changed, but > the new Netlink interface does not. Instead, it just notifies itself > with ETHTOOL_MSG_FEATURES_NTF. > Replace this ethtool_notify() call with netdev_features_change(), so > the kernel will be aware of any features changes, just like in case > with the ioctl interface. This does not omit Ethtool notifications, > as Ethtool itself listens to NETDEV_FEAT_CHANGE and drops > ETHTOOL_MSG_FEATURES_NTF on it > (net/ethtool/netlink.c:ethnl_netdev_event()). > > From v1 [1]: > - dropped extra new line as advised by Jakub; > - no functional changes. > > [1] > https://lore.kernel.org/netdev/alzxq2o5uutvhcfngoiggj8vj3kgo5yiwanqjh0...@cp3-web-009.plabs.ch > > Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") > Signed-off-by: Alexander Lobakin Reviewed-by: Michal Kubecek > --- > net/ethtool/features.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/net/ethtool/features.c b/net/ethtool/features.c > index 8ee4cdbd6b82..1c9f4df273bd 100644 > --- a/net/ethtool/features.c > +++ b/net/ethtool/features.c > @@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct > genl_info *info) > active_diff_mask, compact); > } > if (mod) > - ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); > + netdev_features_change(dev); > > out_rtnl: > rtnl_unlock(); > -- > 2.29.2 > > signature.asc Description: PGP signature
[MPTCP][PATCH net 2/2] mptcp: cleanup for mptcp_pm_alloc_anno_list
This patch added NULL pointer check for mptcp_pm_alloc_anno_list, and avoided similar static checker warnings in mptcp_pm_add_timer. Signed-off-by: Geliang Tang Reviewed-by: Dan Carpenter --- net/mptcp/pm_netlink.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 03f2c28f11f5..dfc1bed4a55f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -266,7 +266,9 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; - struct net *net = sock_net(sk); + + if (!msk) + return false; if (lookup_anno_list_by_saddr(msk, &entry->addr)) return false; @@ -283,7 +285,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); sk_reset_timer(sk, &add_entry->add_timer, - jiffies + mptcp_get_add_addr_timeout(net)); + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); return true; } -- 2.26.2
[MPTCP][PATCH net 0/2] fix static checker warnings in
This patchset fixed static checker warnings in mptcp_pm_add_timer and mptcp_pm_alloc_anno_list. Geliang Tang (2): mptcp: fix static checker warnings in mptcp_pm_add_timer mptcp: cleanup for mptcp_pm_alloc_anno_list net/mptcp/pm_netlink.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) -- 2.26.2
[PATCH net] tipc: fix memory leak in tipc_topsrv_start()
kmemleak report a memory leak as follows: unreferenced object 0x88810a596800 (size 512): comm "ip", pid 21558, jiffies 4297568990 (age 112.120s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .N.. ff ff ff ff ff ff ff ff 00 83 60 b0 ff ff ff ff ..`. backtrace: [<22bbe21f>] tipc_topsrv_init_net+0x1f3/0xa70 [] ops_init+0xa8/0x3c0 [<138af6f2>] setup_net+0x2de/0x7e0 [<8c6807a3>] copy_net_ns+0x27d/0x530 [<6b21adbd>] create_new_namespaces+0x382/0xa30 [ ] unshare_nsproxy_namespaces+0xa1/0x1d0 [ ] ksys_unshare+0x39c/0x780 [<09ba3b19>] __x64_sys_unshare+0x2d/0x40 [<614ad866>] do_syscall_64+0x56/0xa0 [ ] entry_SYSCALL_64_after_hwframe+0x44/0xa9 'srv' is malloced in tipc_topsrv_start() but not free before leaving from the error handling cases. We need to free it. Fixes: 5c45ab24ac77 ("tipc: make struct tipc_server private for server.c") Reported-by: Hulk Robot Signed-off-by: Wang Hai --- net/tipc/topsrv.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 5f6f86051c83..13f3143609f9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } -- 2.17.1
Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 2:46 PM zhangqilong wrote: > > Hi, > > > > > On Mon, Nov 9, 2020 at 2:24 PM zhangqilong > > wrote: > > > > > > Hi > > > > > > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong > > > > > > > > wrote: > > > > > > > > > > In many case, we need to check return value of > > > > > pm_runtime_get_sync, but it brings a trouble to the usage counter > > > > > processing. Many callers forget to decrease the usage counter when > > > > > it failed. It has been discussed a lot[0][1]. So we add a function > > > > > to deal with the usage counter for better coding. > > > > > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520 > > > > > 0951 48.10995-1-dinghao@zju.edu.cn/ > > > > > Signed-off-by: Zhang Qilong > > > > > --- > > > > > include/linux/pm_runtime.h | 32 > > > > > > > 1 file changed, 32 insertions(+) > > > > > > > > > > diff --git a/include/linux/pm_runtime.h > > > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd > > > > > 100644 > > > > > --- a/include/linux/pm_runtime.h > > > > > +++ b/include/linux/pm_runtime.h > > > > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct > > > > > device > > > > *dev) > > > > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > > > > > > > +/** > > > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device > > > > > +and > > > > resume it. > > > > > + * @dev: Target device. > > > > > > > > The force argument is not documented. > > > > > > (1) Good catch, I will add it in next version. > > > > > > > > > > > > + * > > > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > > > + runtime-resume > > > > > + * of it synchronously. If __pm_runtime_resume return negative > > > > > + value(device is in > > > > > + * error state) or return positive value(the runtime of device is > > > > > + already active) > > > > > + * with force is true, it need decrease the usage counter of the > > > > > + device when > > > > > + * return. > > > > > + * > > > > > + * The possible return values of this function is zero or negative > > > > > value. > > > > > + * zero: > > > > > + *- it means success and the status will store the resume > > > > > operation > > > > status > > > > > + * if needed, the runtime PM usage counter of @dev remains > > > > incremented. > > > > > + * negative: > > > > > + *- it means failure and the runtime PM usage counter of @dev has > > > > been > > > > > + * decreased. > > > > > + * positive: > > > > > + *- it means the runtime of the device is already active before > > > > > that. > > If > > > > > + * caller set force to true, we still need to decrease the usage > > > > counter. > > > > > > > > Why is this needed? > > > > > > (2) If caller set force, it means caller will return even the device > > > has already been active (__pm_runtime_resume return positive value) > > > after calling gene_pm_runtime_get_sync, we still need to decrease the > > usage count. > > > > But who needs this? > > > > I don't think that it is a good idea to complicate the API this way. > > The callers like: > ret = pm_runtime_get_sync(dev); > if (ret) { > ... > return (xxx); > } Which isn't correct really, is it? If ret is greater than 0, the error should not be returned in the first place, so you may want the new wrapper to return zero in that case instead. > drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn: pm_runtime_get_sync() > also returns 1 on success > drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device() warn: > 'pm_runtime_get_sync(&pdev->dev)' returns positive and negative > drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn: pm_runtime_get_sync() > also returns 1 on success > drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89 > mtk_vcodec_dec_pw_on() warn: pm_runtime_get_sync() also returns 1 on success > drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/media/platform/ti-vpe/vpe.c:2478 vpe_runtime_get() warn: > pm_runtime_get_sync() also returns 1 on success > drivers/media/i2c/smiapp/smiapp-core.c:1529 smiapp_pm_get_init() warn: > pm_runtime_get_sync() also returns 1 on success > ... > they need it to simplify the function. > > If we only
Re: net: dsa: hellcreek: Add support for hardware timestamping
On 09/11/2020 13:59, Kurt Kanzenbach wrote: > Hi Colin, > > On Mon Nov 09 2020, Colin Ian King wrote: >> Hi >> >> Static analysis on linux-next with Coverity has detected a potential >> null pointer dereference issue on the following commit: >> >> commit f0d4ba9eff75a79fccb7793f4d9f12303d458603 >> Author: Kamil Alkhouri >> Date: Tue Nov 3 08:10:58 2020 +0100 >> >> net: dsa: hellcreek: Add support for hardware timestamping >> >> The analysis is as follows: >> >> 323/* Get nanoseconds from ptp packet */ >> 324type = SKB_PTP_TYPE(skb); >> >>4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12 >> times). >>5. var_assigned: Assigning: hdr = NULL return value from >> ptp_parse_header. >> >> 325hdr = ptp_parse_header(skb, type); >> >>Dereference null return value (NULL_RETURNS) >>6. dereference: Dereferencing a pointer that might be NULL hdr when >> calling hellcreek_get_reserved_field. >> >> 326ns = hellcreek_get_reserved_field(hdr); >> 327hellcreek_clear_reserved_field(hdr); >> >> This issue can only occur if the type & PTP_CLASS_PMASK is not one of >> PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2. I'm not sure if this is >> a possibility or not, but I'm assuming that it would be useful to >> perform the null check just in case, but I'm not sure how this affects >> the hw timestamping code in this function. > > I don't see how the null pointer dereference could happen. That's the > Rx path you showed above. > > The counter part code is: > > hellcreek_port_rxtstamp: > > /* Make sure the message is a PTP message that needs to be timestamped >* and the interaction with the HW timestamping is enabled. If not, stop >* here >*/ > hdr = hellcreek_should_tstamp(hellcreek, port, skb, type); > if (!hdr) > return false; > > SKB_PTP_TYPE(skb) = type; > > Here the type is stored and hellcreek_should_tstamp() also calls > ptp_parse_header() internally. Only when ptp_parse_header() didn't > return NULL the first time the timestamping continues. It should be > safe. > > Also the error handling would be interesting at that point. What should > happen if the header is null then? Returning an invalid timestamp? > Ignore it? > > Hm. I think we have to make sure that it is a valid ptp packet before > reaching this code and that's what we've implemented. So, I guess it's > OK. OK - thanks, I'll mark this as a false positive. > > Thanks, > Kurt >
答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
> On Mon, Nov 9, 2020 at 2:46 PM zhangqilong > wrote: > > > > Hi, > > > > > > > > On Mon, Nov 9, 2020 at 2:24 PM zhangqilong > > > wrote: > > > > > > > > Hi > > > > > > > > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong > > > > > > > > > > wrote: > > > > > > > > > > > > In many case, we need to check return value of > > > > > > pm_runtime_get_sync, but it brings a trouble to the usage > > > > > > counter processing. Many callers forget to decrease the usage > > > > > > counter when it failed. It has been discussed a lot[0][1]. So > > > > > > we add a function to deal with the usage counter for better coding. > > > > > > > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/2020 > > > > > > 0520 > > > > > > 0951 48.10995-1-dinghao@zju.edu.cn/ > > > > > > Signed-off-by: Zhang Qilong > > > > > > --- > > > > > > include/linux/pm_runtime.h | 32 > > > > > > > > > 1 file changed, 32 insertions(+) > > > > > > > > > > > > diff --git a/include/linux/pm_runtime.h > > > > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd > > > > > > 100644 > > > > > > --- a/include/linux/pm_runtime.h > > > > > > +++ b/include/linux/pm_runtime.h > > > > > > @@ -386,6 +386,38 @@ static inline int > > > > > > pm_runtime_get_sync(struct device > > > > > *dev) > > > > > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > > > > > > > > > +/** > > > > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a > > > > > > +device and > > > > > resume it. > > > > > > + * @dev: Target device. > > > > > > > > > > The force argument is not documented. > > > > > > > > (1) Good catch, I will add it in next version. > > > > > > > > > > > > > > > + * > > > > > > + * Increase runtime PM usage counter of @dev first, and carry > > > > > > + out runtime-resume > > > > > > + * of it synchronously. If __pm_runtime_resume return > > > > > > + negative value(device is in > > > > > > + * error state) or return positive value(the runtime of > > > > > > + device is already active) > > > > > > + * with force is true, it need decrease the usage counter of > > > > > > + the device when > > > > > > + * return. > > > > > > + * > > > > > > + * The possible return values of this function is zero or negative > > > > > > value. > > > > > > + * zero: > > > > > > + *- it means success and the status will store the resume > operation > > > > > status > > > > > > + * if needed, the runtime PM usage counter of @dev remains > > > > > incremented. > > > > > > + * negative: > > > > > > + *- it means failure and the runtime PM usage counter of @dev > has > > > > > been > > > > > > + * decreased. > > > > > > + * positive: > > > > > > + *- it means the runtime of the device is already active before > that. > > > If > > > > > > + * caller set force to true, we still need to decrease the > > > > > > usage > > > > > counter. > > > > > > > > > > Why is this needed? > > > > > > > > (2) If caller set force, it means caller will return even the > > > > device has already been active (__pm_runtime_resume return > > > > positive value) after calling gene_pm_runtime_get_sync, we still > > > > need to decrease the > > > usage count. > > > > > > But who needs this? > > > > > > I don't think that it is a good idea to complicate the API this way. > > > > The callers like: > > ret = pm_runtime_get_sync(dev); > > if (ret) { > > ... > > return (xxx); > > } > > Which isn't correct really, is it? > > If ret is greater than 0, the error should not be returned in the first > place, so > you may want the new wrapper to return zero in that case instead. I get your idea. > > > drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device() > > warn: 'pm_runtime_get_sync(&pdev->dev)' returns positive and negative > > drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream() > > warn: pm_runtime_get_sync() also returns 1 on success > > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89 > > mtk_vcodec_dec_pw_on() warn: pm_runtime_get_sync() also returns 1 on > > success > > drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn: > > pm_runtime_get_sync() also returns 1 on success > > drivers/media/platfor
RE: [EXTERNAL] Re: [PATCH bpf-next v2] Update perf ring buffer to prevent corruption
> -Original Message- > From: Peter Zijlstra > Sent: 09 November 2020 11:29 > To: Alexei Starovoitov > Cc: Kevin Sheldrake ; Ingo Molnar > ; Daniel Borkmann ; Network > Development ; b...@vger.kernel.org; Andrii > Nakryiko ; KP Singh > Subject: [EXTERNAL] Re: [PATCH bpf-next v2] Update perf ring buffer to > prevent corruption > > On Thu, Nov 05, 2020 at 08:19:47PM -0800, Alexei Starovoitov wrote: > > On Thu, Nov 5, 2020 at 7:18 AM Kevin Sheldrake > > wrote: > > > > > > Resent due to some failure at my end. Apologies if it arrives twice. > > > > > > From 63e34d4106b4dd767f9bfce951f8a35f14b52072 Mon Sep 17 00:00:00 > 2001 > > > From: Kevin Sheldrake > > > Date: Thu, 5 Nov 2020 12:18:53 + > > > Subject: [PATCH] Update perf ring buffer to prevent corruption from > > > bpf_perf_output_event() > > > > > > The bpf_perf_output_event() helper takes a sample size parameter of > u64, but > > > the underlying perf ring buffer uses a u16 internally. This 64KB maximum > size > > > has to also accommodate a variable sized header. Failure to observe this > > > restriction can result in corruption of the perf ring buffer as samples > > > overlap. > > > > > > Track the sample size and return -E2BIG if too big to fit into the u16 > > > size parameter. > > > > > > Signed-off-by: Kevin Sheldrake > > > > The fix makes sense to me. > > Peter, Ingo, > > should I take it through the bpf tree or you want to route via tip? > > What are you doing to trigger this? The Changelog is devoid of much > useful information? Hello I triggered the corruption by sending samples larger than 64KB-24 bytes to a perf ring buffer from eBPF using bpf_perf_event_output(). The u16 that holds the size in the struct perf_event_header is overflowed and the distance between adjacent samples in the perf ring buffer is set by this overflowed value; hence if samples of 64KB are sent, adjacent samples are placed 24 bytes apart in the ring buffer, with the later ones overwriting parts of the earlier ones. If samples aren't read as quickly as they are received, then they are corrupted by the time they are read. Attempts to fix this in the eBPF verifier failed as the actual sample is constructed from a variable sized header in addition to the raw data supplied from eBPF. The sample is constructed in perf_prepare_sample(), outside of the eBPF engine. My proposed fix is to check that the constructed size is https://github.com/microsoft/OMS-Auditd-Plugin/tree/MSTIC-Research/ebpf_perf_output_poc Thanks Kevin Sheldrake
Re: [PATCH] page_frag: Recover from memory pressure
On Thu, Nov 05, 2020 at 02:02:24PM +, Matthew Wilcox wrote: > On Thu, Nov 05, 2020 at 02:21:25PM +0100, Eric Dumazet wrote: > > On 11/5/20 5:21 AM, Matthew Wilcox (Oracle) wrote: > > > When the machine is under extreme memory pressure, the page_frag allocator > > > signals this to the networking stack by marking allocations with the > > > 'pfmemalloc' flag, which causes non-essential packets to be dropped. > > > Unfortunately, even after the machine recovers from the low memory > > > condition, the page continues to be used by the page_frag allocator, > > > so all allocations from this page will continue to be dropped. > > > > > > Fix this by freeing and re-allocating the page instead of recycling it. > > > > > > Reported-by: Dongli Zhang > > > Cc: Aruna Ramakrishna > > > Cc: Bert Barbe > > > Cc: Rama Nichanamatlu > > > Cc: Venkat Venkatsubra > > > Cc: Manjunath Patil > > > Cc: Joe Jin > > > Cc: SRINIVAS > > > Cc: sta...@vger.kernel.org > > > Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve") > > > > Your patch looks fine, although this Fixes: tag seems incorrect. > > > > 79930f5892e ("net: do not deplete pfmemalloc reserve") was propagating > > the page pfmemalloc status into the skb, and seems correct to me. > > > > The bug was the page_frag_alloc() was keeping a problematic page for > > an arbitrary period of time ? > > Isn't this the commit which unmasks the problem, though? I don't think > it's the buggy commit, but if your tree doesn't have 79930f5892e, then > you don't need this patch. > > Or are you saying the problem dates back all the way to > c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") > > > > + if (nc->pfmemalloc) { > > > > if (unlikely(nc->pfmemalloc)) { > > ACK. Will make the change once we've settled on an appropriate Fixes tag. Which commit should I claim this fixes?
Re: [PATCH] page_frag: Recover from memory pressure
On 11/9/20 3:32 PM, Matthew Wilcox wrote: > On Thu, Nov 05, 2020 at 02:02:24PM +, Matthew Wilcox wrote: >> On Thu, Nov 05, 2020 at 02:21:25PM +0100, Eric Dumazet wrote: >>> On 11/5/20 5:21 AM, Matthew Wilcox (Oracle) wrote: When the machine is under extreme memory pressure, the page_frag allocator signals this to the networking stack by marking allocations with the 'pfmemalloc' flag, which causes non-essential packets to be dropped. Unfortunately, even after the machine recovers from the low memory condition, the page continues to be used by the page_frag allocator, so all allocations from this page will continue to be dropped. Fix this by freeing and re-allocating the page instead of recycling it. Reported-by: Dongli Zhang Cc: Aruna Ramakrishna Cc: Bert Barbe Cc: Rama Nichanamatlu Cc: Venkat Venkatsubra Cc: Manjunath Patil Cc: Joe Jin Cc: SRINIVAS Cc: sta...@vger.kernel.org Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve") >>> >>> Your patch looks fine, although this Fixes: tag seems incorrect. >>> >>> 79930f5892e ("net: do not deplete pfmemalloc reserve") was propagating >>> the page pfmemalloc status into the skb, and seems correct to me. >>> >>> The bug was the page_frag_alloc() was keeping a problematic page for >>> an arbitrary period of time ? >> >> Isn't this the commit which unmasks the problem, though? I don't think >> it's the buggy commit, but if your tree doesn't have 79930f5892e, then >> you don't need this patch. >> >> Or are you saying the problem dates back all the way to >> c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves") >> + if (nc->pfmemalloc) { >>> >>> if (unlikely(nc->pfmemalloc)) { >> >> ACK. Will make the change once we've settled on an appropriate Fixes tag. > > Which commit should I claim this fixes? Hmm, no big deal, lets not waste time on tracking precise bug origin.
Re: [PATCH net] net: udp: fix Fast/frag0 UDP GRO
On Sat, Nov 7, 2020 at 8:11 PM Alexander Lobakin wrote: > > While testing UDP GSO fraglists forwarding through driver that uses > Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order > iperf packets: > > [ ID] Interval Transfer Bitrate Jitter > [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order > > Simple switch to napi_gro_receive() any other method without frag0 > shortcut completely resolved them. > > I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive() > callback. While it's probably OK for non-frag0 paths (when all > headers or even the entire frame are already in skb->data), this > inline points to junk when using Fast GRO (napi_gro_frags() or > napi_gro_receive() with only Ethernet header in skb->data and all > the rest in shinfo->frags) and breaks GRO packet compilation and > the packet flow itself. > To support both modes, skb_gro_header_fast() + skb_gro_header_slow() > are typically used. UDP even has an inline helper that makes use of > them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr() > to get rid of the out-of-order delivers. > > Present since the introduction of plain UDP GRO in 5.0-rc1. > > Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") > Signed-off-by: Alexander Lobakin > --- > net/ipv4/udp_offload.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c > index e67a66fbf27b..13740e9fe6ec 100644 > --- a/net/ipv4/udp_offload.c > +++ b/net/ipv4/udp_offload.c > @@ -366,7 +366,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff > *skb, > static struct sk_buff *udp_gro_receive_segment(struct list_head *head, >struct sk_buff *skb) > { > - struct udphdr *uh = udp_hdr(skb); > + struct udphdr *uh = udp_gro_udphdr(skb); > struct sk_buff *pp = NULL; > struct udphdr *uh2; > struct sk_buff *p; Good catch. skb_gro_header_slow may fail and return NULL. Need to check that before dereferencing uh below in /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { NAPI_GRO_CB(skb)->flush = 1; return NULL; }
[PATCH] net: dsa: mv88e6xxx: Fix memleak in mv88e6xxx_region_atu_snapshot
When mv88e6xxx_fid_map return error, we lost free the table. Fix it. Fixes: bfb255428966 ("net: dsa: mv88e6xxx: Add devlink regions") Reported-by: Hulk Robot Signed-off-by: zhangxiaoxu --- drivers/net/dsa/mv88e6xxx/devlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 10cd1bfd81a0..ade04c036fd9 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -393,8 +393,10 @@ static int mv88e6xxx_region_atu_snapshot(struct devlink *dl, mv88e6xxx_reg_lock(chip); err = mv88e6xxx_fid_map(chip, fid_bitmap); - if (err) + if (err) { + kfree(table); goto out; + } while (1) { fid = find_next_bit(fid_bitmap, MV88E6XXX_N_FID, fid + 1); -- 2.25.4
Re: [PATCH] net: dsa: mv88e6xxx: Fix memleak in mv88e6xxx_region_atu_snapshot
On Mon, Nov 09, 2020 at 09:44:16AM -0500, zhangxiaoxu wrote: > When mv88e6xxx_fid_map return error, we lost free the table. > > Fix it. > > Fixes: bfb255428966 ("net: dsa: mv88e6xxx: Add devlink regions") > Reported-by: Hulk Robot > Signed-off-by: zhangxiaoxu Reviewed-by: Andrew Lunn Andrew
Re: [PATCH] net: tcp: ratelimit warnings in tcp_recvmsg
On Mon, Nov 9, 2020 at 9:36 PM Eric Dumazet wrote: > > I do not think this patch is useful. That is simply code churn. > > Can you trigger the WARN() in the latest upstream version ? > If yes this is a serious bug that needs urgent attention. > > Make sure you have backported all needed fixes into your kernel, if > you get this warning on a non pristine kernel. Theoretically, this WARN() shouldn't be triggered in any branches. Somehow, it just happened in kernel v3.10. This really confused me. I wasn't able to keep tracing it, as it is a product environment. I notice that the codes for tcp skb receiving didn't change much between v3.10 and the latest upstream version, and guess the latest version can be triggered too. If something is fixed and this WARN() won't be triggered, just ignore me. Cheers, Menglong Dong
Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.
On Mon, Nov 09, 2020 at 03:24:37PM +0200, Vlad Buslov wrote: > On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote: ... > > @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY > > To compile this code as a module, choose M here: the > > module will be called act_tunnel_key. > > > > +config NET_ACT_FRAG > > + tristate "Packet fragmentation" > > + depends on NET_CLS_ACT > > + help > > + Say Y here to allow fragmenting big packets when outputting > > + with the mirred action. > > + > > + If unsure, say N. > > + > > + To compile this code as a module, choose M here: the > > + module will be called act_frag. > > + > > Just wondering, what is the motivation for putting the frag code into > standalone module? It doesn't implement usual act_* interface and is not > user-configurable. To me it looks like functionality that belongs to > act_api. Am I missing something? It's the way we found so far for not "polluting" mirred/tc with L3 functionality, per Cong's feedbacks on previous attempts. As for why not act_api, this is not some code that other actions can just re-use and that file is already quite big, so I thought act_frag would be better to keep it isolated/contained. If act_frag is confusing, then maybe act_mirred_frag? It is a mirred plugin now, after all. ... > > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, > > + int (*xmit)(struct sk_buff *skb))) > > +{ > > + if (!tcf_xmit_hook_enabled()) > > + xchg(&tcf_xmit_hook, xmit_hook); > > Marcelo, why did you suggest to use atomic operations to change > tcf_xmit_hook variable? It is not obvious to me after reading the code. I thought as a minimal way to not have problems on module removal, but your comment below proves it is not right/enough. :-) > > > + else if (xmit_hook != tcf_xmit_hook) > > + return -EBUSY; > > + > > + tcf_inc_xmit_hook(); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(tcf_set_xmit_hook); > > + > > +void tcf_clear_xmit_hook(void) > > +{ > > + tcf_dec_xmit_hook(); > > + > > + if (!tcf_xmit_hook_enabled()) > > + xchg(&tcf_xmit_hook, NULL); > > +} > > +EXPORT_SYMBOL_GPL(tcf_clear_xmit_hook); > > + > > +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff > > *skb)) > > +{ > > + if (tcf_xmit_hook_enabled()) > > Okay, so what happens here if tcf_xmit_hook is disabled concurrently? If > we get here from some rule that doesn't involve act_ct but uses > act_mirred and act_ct is concurrently removed decrementing last > reference to static branch and setting tcf_xmit_hook to NULL? Yeah.. good point. Thinking further now, what about using RCU for the hook? AFAICT it can cover the synchronization needed when clearing the pointer, tcf_set_xmit_hook() should do a module_get() and tcf_clear_xmit_hook() can delay a module_put(act_frag) as needed with call_rcu. I see tcf_mirred_act is already calling rcu_dereference_bh(), so it's already protected by rcu read here and calling tcf_xmit_hook() with xmit pointer should be fine. WDYT? > > > + return tcf_xmit_hook(skb, xmit); > > + else > > + return xmit(skb); > > +} > > +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);
Re: [PATCH v4 5/7] can: update documentation for DLC usage in Classical CAN
On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote: > > The extension of struct can_frame with the len8_dlc element and the > can_dlc naming issue required an update of the documentation. > > Additionally introduce the term 'Classical CAN' which has been established > by CAN in Automation to separate the original CAN2.0 A/B from CAN FD. > > Updated some data structures and flags. > > Signed-off-by: Oliver Hartkopp > --- > Documentation/networking/can.rst | 68 > 1 file changed, 52 insertions(+), 16 deletions(-) > > diff --git a/Documentation/networking/can.rst > b/Documentation/networking/can.rst > index ff05cbd05e0d..e17c6427bb3a 100644 > --- a/Documentation/networking/can.rst > +++ b/Documentation/networking/can.rst > @@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to > different addressing > the socket, you can read(2) and write(2) from/to the socket or use > send(2), sendto(2), sendmsg(2) and the recv* counterpart operations > on the socket as usual. There are also CAN specific socket options > described below. > > -The basic CAN frame structure and the sockaddr structure are defined > -in include/linux/can.h: > +The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure > +and the sockaddr structure are defined in include/linux/can.h: > > .. code-block:: C > > struct can_frame { > canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ > -__u8can_dlc; /* frame payload length in byte (0 .. 8) */ > +union { > +/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN) > + * was previously named can_dlc so we need to carry that > + * name for legacy support > + */ > +__u8 len; > +__u8 can_dlc; /* deprecated */ > +}; > __u8__pad; /* padding */ > __u8__res0; /* reserved / padding */ > -__u8__res1; /* reserved / padding */ > +__u8len8_dlc; /* optional DLC for 8 byte payload length (9 > .. 15) */ > __u8data[8] __attribute__((aligned(8))); > }; > > +Remark: The len element contains the payload length in bytes and should be > +used instead of can_dlc. The deprecated can_dlc was misleadingly named as > +it always contained the plain payload length in bytes and not the so called > +'data length code' (DLC). > + > +To pass the raw DLC from/to a Classical CAN network device the len8_dlc > +element can contain values 9 .. 15 when the len element is 8 (the real > +payload length for all DLC values greater or equal to 8). The "Classical CAN network device" part could make the reader misunderstand that FD capable controllers can not handle Classical CAN frames with DLC greater than 8. All the CAN-FD controllers I am aware of can emit both Classical and FD frames. On the contrary, some Classical CAN controllers might not support sending DLCs greater than 8. Propose to add the nuance that this depends on the device property: +To pass the raw DLC from/to a capable network device +(c.f. cc-len8-dlc CAN device property), the len8_dlc element can +contain values 9 .. 15 when the len element is 8 (the real payload +length for all DLC values greater or equal to 8). > + > The alignment of the (linear) payload data[] to a 64bit boundary > allows the user to define their own structs and unions to easily access > the CAN payload. There is no given byteorder on the CAN bus by > default. A read(2) system call on a CAN_RAW socket transfers a > struct can_frame to the user space. > @@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific > interface: > int can_ifindex; > union { > /* transport protocol class address info (e.g. ISOTP) */ > struct { canid_t rx_id, tx_id; } tp; > > +/* J1939 address information */ > +struct { > +/* 8 byte name when using dynamic addressing */ > +__u64 name; > + > +/* pgn: > + * 8 bit: PS in PDU2 case, else 0 > + * 8 bit: PF > + * 1 bit: DP > + * 1 bit: reserved > + */ > +__u32 pgn; > + > +/* 1 byte address */ > +__u8 addr; > +} j1939; > + > /* reserved for future CAN protocols address information > */ > } can_addr; > }; This looks like some J1939 code. Did you mix your patches? > To determine the interface index an appropriate ioctl() has to > @@ -369,11 +402,11 @@ bitrates for the arbitration phase and the payload > phase of the CAN FD frame > and up to 6
Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.
在 2020/11/9 21:24, Vlad Buslov 写道: > On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote: >> From: wenxu >> >> Currently kernel tc subsystem can do conntrack in act_ct. But when several >> fragment packets go through the act_ct, function tcf_ct_handle_fragments >> will defrag the packets to a big one. But the last action will redirect >> mirred to a device which maybe lead the reassembly big packet over the mtu >> of target device. >> >> This patch add support for a xmit hook to mirred, that gets executed before >> xmiting the packet. Then, when act_ct gets loaded, it configs that hook. >> The frag xmit hook maybe reused by other modules. >> >> Signed-off-by: wenxu >> --- >> v2: Fix the crash for act_frag module without load >> v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled >> in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook >> v4: using skb_protocol and fix line length exceeds 80 columns >> v5: no change >> >> include/net/act_api.h | 16 + >> net/sched/Kconfig | 13 >> net/sched/Makefile | 1 + >> net/sched/act_api.c| 51 +++ >> net/sched/act_ct.c | 7 +++ >> net/sched/act_frag.c | 164 >> + >> net/sched/act_mirred.c | 2 +- >> 7 files changed, 253 insertions(+), 1 deletion(-) >> create mode 100644 net/sched/act_frag.c >> >> diff --git a/include/net/act_api.h b/include/net/act_api.h >> index 8721492..403a618 100644 >> --- a/include/net/act_api.h >> +++ b/include/net/act_api.h >> @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct >> tcf_proto *tp, >> struct netlink_ext_ack *newchain); >> struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, >> struct tcf_chain *newchain); >> + >> +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff >> *skb)); >> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, >> + int (*xmit)(struct sk_buff *skb))); >> +void tcf_clear_xmit_hook(void); >> + >> +#if IS_ENABLED(CONFIG_NET_ACT_FRAG) >> +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff >> *skb)); >> +#else >> +static inline int tcf_frag_xmit_hook(struct sk_buff *skb, >> + int (*xmit)(struct sk_buff *skb)) >> +{ >> +return 0; >> +} >> +#endif >> + >> #endif /* CONFIG_NET_CLS_ACT */ >> >> static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, >> diff --git a/net/sched/Kconfig b/net/sched/Kconfig >> index a3b37d8..9a240c7 100644 >> --- a/net/sched/Kconfig >> +++ b/net/sched/Kconfig >> @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY >>To compile this code as a module, choose M here: the >>module will be called act_tunnel_key. >> >> +config NET_ACT_FRAG >> +tristate "Packet fragmentation" >> +depends on NET_CLS_ACT >> +help >> + Say Y here to allow fragmenting big packets when outputting >> + with the mirred action. >> + >> + If unsure, say N. >> + >> + To compile this code as a module, choose M here: the >> + module will be called act_frag. >> + > Just wondering, what is the motivation for putting the frag code into > standalone module? It doesn't implement usual act_* interface and is not > user-configurable. To me it looks like functionality that belongs to > act_api. Am I missing something? The fragment operation is an single L3 action. So we put in an single modules. Maybe it is not proper to put in the act_api directly. >> config NET_ACT_CT >> tristate "connection tracking tc action" >> depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE >> +depends on NET_ACT_FRAG >> help >>Say Y here to allow sending the packets to conntrack module. >> >> diff --git a/net/sched/Makefile b/net/sched/Makefile >> index 66bbf9a..c146186 100644 >> --- a/net/sched/Makefile >> +++ b/net/sched/Makefile >> @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o >> obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o >> obj-$(CONFIG_NET_IFE_SKBTCINDEX)+= act_meta_skbtcindex.o >> obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o >> +obj-$(CONFIG_NET_ACT_FRAG) += act_frag.o >> obj-$(CONFIG_NET_ACT_CT)+= act_ct.o >> obj-$(CONFIG_NET_ACT_GATE) += act_gate.o >> obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o >> diff --git a/net/sched/act_api.c b/net/sched/act_api.c >> index f66417d..e7b501c 100644 >> --- a/net/sched/act_api.c >> +++ b/net/sched/act_api.c >> @@ -22,6 +22,57 @@ >> #include >> #include >> >> +static int (*tcf_xmit_hook)(struct sk_buff *skb, >> +int (*xmit)(struct sk_buff *skb)); >> +static DEFINE_STATIC_KEY_FALSE(tcf_xmit_hook_in_use); >> + >> +static void tcf_inc_xmit_hook(void) >> +{ >> +static_branch_inc(&tcf_xmit_hook_in_use); >> +} >> + >> +static void tcf_dec_xmit_hook(void) >> +{ >>
[PATCH v2 2/2] net: fec: Fix reference count leak in fec series ops
pm_runtime_get_sync() will increment pm usage at first and it will resume the device later. If runtime of the device has error or device is in inaccessible state(or other error state), resume operation will fail. If we do not call put operation to decrease the reference, it will result in reference count leak. Moreover, this device cannot enter the idle state and always stay busy or other non-idle state later. So we fixed it by replacing it with pm_runtime_general_get. Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio bus") Signed-off-by: Zhang Qilong --- drivers/net/ethernet/freescale/fec_main.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index d7919555250d..695720f8263f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1808,7 +1808,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) int ret = 0, frame_start, frame_addr, frame_op; bool is_c45 = !!(regnum & MII_ADDR_C45); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_general_get(dev); if (ret < 0) return ret; @@ -1867,11 +1867,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, int ret, frame_start, frame_addr; bool is_c45 = !!(regnum & MII_ADDR_C45); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_general_get(dev); if (ret < 0) return ret; - else - ret = 0; if (is_c45) { frame_start = FEC_MMFR_ST_C45; @@ -2275,7 +2273,7 @@ static void fec_enet_get_regs(struct net_device *ndev, u32 i, off; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_general_get(dev); if (ret < 0) return; @@ -2976,7 +2974,7 @@ fec_enet_open(struct net_device *ndev) int ret; bool reset_again; - ret = pm_runtime_get_sync(&fep->pdev->dev); + ret = pm_runtime_general_get(&fep->pdev->dev); if (ret < 0) return ret; @@ -3770,7 +3768,7 @@ fec_drv_remove(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int ret; - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_general_get(&pdev->dev); if (ret < 0) return ret; -- 2.25.4
[PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ Signed-off-by: Zhang Qilong --- include/linux/pm_runtime.h | 30 ++ 1 file changed, 30 insertions(+) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..6549ce764400 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_general_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Increase runtime PM usage counter of @dev first, and carry out runtime-resume + * of it synchronously. If __pm_runtime_resume return negative value(device is in + * error state), we to need decrease the usage counter before it return. If + * __pm_runtime_resume return positive value, it means the runtime of device has + * already been in active state, and we let the new wrapper return zero instead. + * + * The possible return values of this function is zero or negative value. + * zero: + *- it means resume succeeed or runtime of device has already been active, the + * runtime PM usage counter of @dev remains incremented. + * negative: + *- it means failure and the runtime PM usage counter of @dev has been balanced. + */ +static inline int pm_runtime_general_get(struct device *dev) +{ + int ret = 0; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. -- 2.25.4
[PATCH v2 0/2] Fix usage counter leak by adding a general sync ops
In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding and view. Then, we replace pm_runtime_get_sync with it in fec_main.c Zhang Qilong (2): PM: runtime: Add a general runtime get sync operation to deal with usage counter net: fec: Fix reference count leak in fec series ops drivers/net/ethernet/freescale/fec_main.c | 12 - include/linux/pm_runtime.h| 30 +++ 2 files changed, 35 insertions(+), 7 deletions(-) -- 2.25.4
Re: [PATCH] IPv6: Set SIT tunnel hard_header_len to zero
On Mon, Nov 9, 2020 at 4:05 AM Oliver Herms wrote: > > > On 04.11.20 20:52, Willem de Bruijn wrote: > Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") > >>> > >>> How did you arrive at this SHA1? > >> I think the legacy usage of hard_header_len in ipv6/sit.c was overseen in > >> c54419321455. > >> Please correct me if I'm wrong. > > > > I don't see anything in that patch assign or modify hard_header_len. > > > It's not assigning or modifying it but changing expectations about how > dev->hard_header_len is to be used. > > The patch changed the MTU calculation from: > mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; > > to this: > mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr); > > Later is became this (in patch 23a3647. This is the current implementation.): > mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - > tunnel_hlen; > > Apparently the initial usage of dev->hard_header_len was that it contains the > length > of all headers before the tunnel payload. c54419321455 changed it to assuming > dev->hard_header_len > does not contain the tunnels outter IP header. Thus I think the bug was > introduced by c54419321455. And the only header in the case of SIT is that outer ip header. Got it, thanks. Overly conservative MTU calculation is one issue. Packet sockets also expect read/write link layer access with SOCK_RAW, which does not work correctly for sit. I'm not sure that it ever did. The chosen commit predates all stable trees, which is the most important point. Acked-by: Willem de Bruijn Could ip6 tunnels have the same issue? In ip6_tnl_dev_init_gen, dev->hard_header_len = LL_MAX_HEADER + t_hlen;
[PATCH net-next v4 02/15] net/smc: Use active link of the connection
From: Guvenc Gulce Use active link of the connection directly and not via linkgroup array structure when obtaining link data of the connection. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_diag.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index f15fca59b4b2..c2225231f679 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -160,17 +160,17 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, - .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, - .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id, + .lnk[0].ibport = smc->conn.lnk->ibport, + .lnk[0].link_id = smc->conn.lnk->link_id, }; memcpy(linfo.lnk[0].ibname, smc->conn.lgr->lnk[0].smcibdev->ibdev->name, - sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); + sizeof(smc->conn.lnk->smcibdev->ibdev->name)); smc_gid_be16_convert(linfo.lnk[0].gid, -smc->conn.lgr->lnk[0].gid); +smc->conn.lnk->gid); smc_gid_be16_convert(linfo.lnk[0].peer_gid, -smc->conn.lgr->lnk[0].peer_gid); +smc->conn.lnk->peer_gid); if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) goto errout; -- 2.17.1
[PATCH net-next v4 04/15] net/smc: Add link counters for IB device ports
From: Guvenc Gulce Add link counters to the structure of the smc ib device, one counter per ib port. Increase/decrease the counters as needed in the corresponding routines. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_core.c | 13 + net/smc/smc_ib.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 323a4b396be0..24d55b5b352b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -63,6 +63,16 @@ static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, return &smc_lgr_list.list; } +static void smc_ibdev_cnt_inc(struct smc_link *lnk) +{ + atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); +} + +static void smc_ibdev_cnt_dec(struct smc_link *lnk) +{ + atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); +} + static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { /* client link group creation always follows the server link group @@ -316,6 +326,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->link_idx = link_idx; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; + smc_ibdev_cnt_inc(lnk); lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); @@ -359,6 +370,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, clear_llc_lnk: smc_llc_link_clear(lnk, false); out: + smc_ibdev_cnt_dec(lnk); put_device(&ini->ib_dev->ibdev->dev); memset(lnk, 0, sizeof(struct smc_link)); lnk->state = SMC_LNK_UNUSED; @@ -749,6 +761,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); + smc_ibdev_cnt_dec(lnk); put_device(&lnk->smcibdev->ibdev->dev); smcibdev = lnk->smcibdev; memset(lnk, 0, sizeof(struct smc_link)); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 2ce481187dd0..3b85360a473b 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -53,6 +53,8 @@ struct smc_ib_device {/* ib-device infos for smc */ atomic_tlnk_cnt;/* number of links on ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ struct mutexmutex; /* protect dev setup+cleanup */ + atomic_tlnk_cnt_by_port[SMC_MAX_PORTS]; + /* number of links per port */ }; struct smc_buf_desc; -- 2.17.1
[PATCH net-next v4 07/15] net/smc: Refactor the netlink reply processing routine
From: Guvenc Gulce Refactor the netlink reply processing routine so that it provides sub functions for specific parts of the processing. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_diag.c | 218 +++-- 1 file changed, 133 insertions(+), 85 deletions(-) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index c2225231f679..44be723c97fe 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -69,35 +69,25 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) } } -static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct smc_diag_msg *r, - struct user_namespace *user_ns) +static bool smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct smc_diag_msg *r, + struct user_namespace *user_ns) { - if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) - return 1; + if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown) < 0) + return false; r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->diag_inode = sock_i_ino(sk); - return 0; + return true; } -static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb, - const struct smc_diag_req *req, - struct nlattr *bc) +static bool smc_diag_fill_base_struct(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + struct smc_diag_msg *r) { struct smc_sock *smc = smc_sk(sk); - struct smc_diag_fallback fallback; struct user_namespace *user_ns; - struct smc_diag_msg *r; - struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); smc_diag_msg_common_fill(r, sk); r->diag_state = sk->sk_state; if (smc->use_fallback) @@ -107,89 +97,148 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, else r->diag_mode = SMC_DIAG_MODE_SMCR; user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); - if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) - goto errout; + if (!smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) + return false; + return true; +} + +static bool smc_diag_fill_fallback(struct sock *sk, struct sk_buff *skb) +{ + struct smc_diag_fallback fallback; + struct smc_sock *smc = smc_sk(sk); + + memset(&fallback, 0, sizeof(fallback)); fallback.reason = smc->fallback_rsn; fallback.peer_diagnosis = smc->peer_diagnosis; if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0) + return false; + + return true; +} + +static bool smc_diag_fill_conninfo(struct sock *sk, struct sk_buff *skb) +{ + struct smc_host_cdc_msg *local_tx, *local_rx; + struct smc_diag_conninfo cinfo; + struct smc_connection *conn; + struct smc_sock *smc; + + smc = smc_sk(sk); + conn = &smc->conn; + local_tx = &conn->local_tx_ctrl; + local_rx = &conn->local_rx_ctrl; + memset(&cinfo, 0, sizeof(cinfo)); + cinfo.token = conn->alert_token_local; + cinfo.sndbuf_size = conn->sndbuf_desc ? conn->sndbuf_desc->len : 0; + cinfo.rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0; + cinfo.peer_rmbe_size = conn->peer_rmbe_size; + + cinfo.rx_prod.wrap = local_rx->prod.wrap; + cinfo.rx_prod.count = local_rx->prod.count; + cinfo.rx_cons.wrap = local_rx->cons.wrap; + cinfo.rx_cons.count = local_rx->cons.count; + + cinfo.tx_prod.wrap = local_tx->prod.wrap; + cinfo.tx_prod.count = local_tx->prod.count; + cinfo.tx_cons.wrap = local_tx->cons.wrap; + cinfo.tx_cons.count = local_tx->cons.count; + + cinfo.tx_prod_flags = *(u8 *)&local_tx->prod_flags; + cinfo.tx_conn_state_flags = *(u8 *)&local_tx->conn_state_flags; + cinfo.rx_prod_flags = *(u8 *)&local_rx->prod_flags; + cinfo.rx_conn_state_flags = *(u8 *)&local_rx->conn_state_flags; + + cinfo.tx_prep.wrap = conn->tx_curs_prep.wrap; + cinfo.tx_prep.count = conn->tx_curs_prep.count; + cinfo.tx_sent.wrap = conn->tx_curs_sent.wrap; + cinfo.tx_sent.count = conn->tx_curs_sent.count; + cinfo.tx_fin.wrap = conn->tx_curs_fin.wrap; + cinfo.tx_fin.count = conn->tx_curs_fin.count; + + if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0) + return false; + + return true; +} + +static bool smc_diag_fill_lgrinfo(struct
[PATCH net-next v4 01/15] net/smc: use helper smc_conn_abort() in listen processing
The helper smc_connect_abort() can be used by the listen processing functions, too. And rename this helper to smc_conn_abort() to make the purpose clearer. No functional change. Signed-off-by: Karsten Graul --- net/smc/af_smc.c | 17 + 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 527185af7bf3..bc3e45289771 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -552,8 +552,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, return smc_connect_fallback(smc, reason_code); } -/* abort connecting */ -static void smc_connect_abort(struct smc_sock *smc, int local_first) +static void smc_conn_abort(struct smc_sock *smc, int local_first) { if (local_first) smc_lgr_cleanup_early(&smc->conn); @@ -814,7 +813,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return 0; connect_abort: - smc_connect_abort(smc, ini->first_contact_local); + smc_conn_abort(smc, ini->first_contact_local); mutex_unlock(&smc_client_lgr_pending); smc->connect_nonblock = 0; @@ -893,7 +892,7 @@ static int smc_connect_ism(struct smc_sock *smc, return 0; connect_abort: - smc_connect_abort(smc, ini->first_contact_local); + smc_conn_abort(smc, ini->first_contact_local); mutex_unlock(&smc_server_lgr_pending); smc->connect_nonblock = 0; @@ -1320,10 +1319,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, int local_first, u8 version) { /* RDMA setup failed, switch back to TCP */ - if (local_first) - smc_lgr_cleanup_early(&new_smc->conn); - else - smc_conn_free(&new_smc->conn); + smc_conn_abort(new_smc, local_first); if (reason_code < 0) { /* error, no fallback possible */ smc_listen_out_err(new_smc); return; @@ -1429,10 +1425,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, /* Create send and receive buffers */ rc = smc_buf_create(new_smc, true); if (rc) { - if (ini->first_contact_local) - smc_lgr_cleanup_early(&new_smc->conn); - else - smc_conn_free(&new_smc->conn); + smc_conn_abort(new_smc, ini->first_contact_local); return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM; } -- 2.17.1
[PATCH net-next v4 10/15] net/smc: Introduce SMCR get link command
From: Guvenc Gulce Introduce get link command which loops through all available links of all available link groups. It uses the SMC-R linkgroup list as entry point, not the socket list, which makes linkgroup diagnosis possible, in case linkgroup does not contain active connections anymore. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc_diag.h | 8 + net/smc/smc_diag.c| 62 ++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 6ae028344b6d..a57df0296aa4 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -79,6 +80,7 @@ enum { /* SMC_DIAG_GET_LGR_INFO command extensions */ enum { SMC_DIAG_LGR_INFO_SMCR = 1, + SMC_DIAG_LGR_INFO_SMCR_LINK, }; #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) @@ -129,6 +131,12 @@ struct smc_diag_linkinfo { __u8 ibport;/* RDMA device port number */ __u8 gid[40]; /* local GID */ __u8 peer_gid[40]; /* peer GID */ + /* Fields above used by legacy v1 code */ + __u32 conn_cnt; + __u8 netdev[IFNAMSIZ]; /* ethernet device name */ + __u8 link_uid[4]; /* unique link id */ + __u8 peer_link_uid[4]; /* unique peer link id */ + __u32 link_state; /* link state */ }; struct smc_diag_lgrinfo { diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index c958b23843e6..9a41548d6263 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -20,6 +20,7 @@ #include #include "smc.h" +#include "smc_ib.h" #include "smc_core.h" static const struct smc_diag_ops *smc_diag_ops; @@ -205,6 +206,54 @@ static bool smc_diag_fill_dmbinfo(struct sock *sk, struct sk_buff *skb) return true; } +static int smc_diag_fill_lgr_link(struct smc_link_group *lgr, + struct smc_link *link, + struct sk_buff *skb, + struct netlink_callback *cb, + struct smc_diag_req_v2 *req) +{ + struct smc_diag_linkinfo link_info; + int dummy = 0, rc = 0; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK, + cb->nlh->nlmsg_type, 0, NLM_F_MULTI); + + memset(&link_info, 0, sizeof(link_info)); + link_info.link_state = link->state; + link_info.link_id = link->link_id; + link_info.conn_cnt = atomic_read(&link->conn_cnt); + link_info.ibport = link->ibport; + + memcpy(link_info.link_uid, link->link_uid, + sizeof(link_info.link_uid)); + snprintf(link_info.ibname, sizeof(link_info.ibname), "%s", +link->ibname); + snprintf(link_info.netdev, sizeof(link_info.netdev), "%s", +link->ndevname); + memcpy(link_info.peer_link_uid, link->peer_link_uid, + sizeof(link_info.peer_link_uid)); + + smc_gid_be16_convert(link_info.gid, +link->gid); + smc_gid_be16_convert(link_info.peer_gid, +link->peer_gid); + + /* Just a command place holder to signal back the command reply type */ + if (nla_put(skb, SMC_DIAG_GET_LGR_INFO, sizeof(dummy), &dummy) < 0) + goto errout; + if (nla_put(skb, SMC_DIAG_LGR_INFO_SMCR_LINK, + sizeof(link_info), &link_info) < 0) + goto errout; + + nlmsg_end(skb, nlh); + return rc; + +errout: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + static int smc_diag_fill_lgr(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb, @@ -240,7 +289,7 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr, struct smc_diag_req_v2 *req) { struct nlmsghdr *nlh; - int rc = 0; + int i, rc = 0; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK, cb->nlh->nlmsg_type, 0, NLM_F_MULTI); @@ -252,6 +301,17 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr, goto errout; nlmsg_end(skb, nlh); + + if ((req->cmd_ext & (1 << (SMC_DIAG_LGR_INFO_SMCR_LINK - 1 { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_usable(&lgr->lnk[i])) + continue; + rc = smc_diag_fill_lgr_link(lgr, &lgr->lnk[i], skb, + cb, req); + if (rc < 0) + goto errout; + } + } return rc; errout: -- 2.17.1
[PATCH net-next v4 11/15] net/smc: Add SMC-D Linkgroup diagnostic support
From: Guvenc Gulce Deliver SMCD Linkgroup information via netlink based diagnostic interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc_diag.h | 7 +++ net/smc/smc_core.c| 7 +++ net/smc/smc_core.h| 2 + net/smc/smc_diag.c| 108 ++ 4 files changed, 124 insertions(+) diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index a57df0296aa4..5a80172df757 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -81,6 +81,7 @@ enum { enum { SMC_DIAG_LGR_INFO_SMCR = 1, SMC_DIAG_LGR_INFO_SMCR_LINK, + SMC_DIAG_LGR_INFO_SMCD, }; #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) @@ -155,6 +156,12 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __aligned_u64 my_gid; /* My GID */ __aligned_u64 token; /* Token of DMB */ __aligned_u64 peer_token; /* Token of remote DMBE */ + /* Fields above used by legacy v1 code */ + __u8pnet_id[SMC_MAX_PNETID_LEN]; /* Pnet ID */ + __u32 conns_num; /* Number of connections */ + __u16 chid; /* Linkgroup CHID */ + __u8vlan_id;/* Linkgroup vlan id */ + struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */ }; struct smc_diag_lgr { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2dcb1208f55e..37cc754485f0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -214,6 +214,11 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) conn->lgr = NULL; } +static struct smcd_dev_list *smc_get_smcd_dev_list(void) +{ + return &smcd_dev_list; +} + static struct smc_lgr_list *smc_get_lgr_list(void) { return &smc_lgr_list; @@ -221,6 +226,8 @@ static struct smc_lgr_list *smc_get_lgr_list(void) static const struct smc_diag_ops smc_diag_ops = { .get_lgr_list = smc_get_lgr_list, + .get_smcd_devices = smc_get_smcd_dev_list, + .get_chid = smc_ism_get_chid, }; const struct smc_diag_ops *smc_get_diag_ops(void) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 09ff2011dd78..fb1f63f5e681 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -23,6 +23,8 @@ /* Functions which are needed for diagnostic purposes by smc_diag module */ struct smc_diag_ops { struct smc_lgr_list *(*get_lgr_list)(void); + struct smcd_dev_list *(*get_smcd_devices)(void); + u16 (*get_chid)(struct smcd_dev *smcd); }; struct smc_lgr_list { /* list of link group definition */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 9a41548d6263..a644e2299dbc 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -21,6 +21,7 @@ #include "smc.h" #include "smc_ib.h" +#include "smc_ism.h" #include "smc_core.h" static const struct smc_diag_ops *smc_diag_ops; @@ -254,6 +255,53 @@ static int smc_diag_fill_lgr_link(struct smc_link_group *lgr, return -EMSGSIZE; } +static int smc_diag_fill_smcd_lgr(struct smc_link_group *lgr, + struct sk_buff *skb, + struct netlink_callback *cb, + struct smc_diag_req_v2 *req) +{ + struct smcd_diag_dmbinfo smcd_lgr; + struct nlmsghdr *nlh; + int dummy = 0; + int rc = 0; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK, + cb->nlh->nlmsg_type, 0, NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + memset(&smcd_lgr, 0, sizeof(smcd_lgr)); + memcpy(&smcd_lgr.linkid, lgr->id, sizeof(lgr->id)); + smcd_lgr.conns_num = lgr->conns_num; + smcd_lgr.vlan_id = lgr->vlan_id; + smcd_lgr.peer_gid = lgr->peer_gid; + smcd_lgr.my_gid = lgr->smcd->local_gid; + smcd_lgr.chid = smc_diag_ops->get_chid(lgr->smcd); + memcpy(&smcd_lgr.v2_lgr_info.negotiated_eid, lgr->negotiated_eid, + sizeof(smcd_lgr.v2_lgr_info.negotiated_eid)); + memcpy(&smcd_lgr.v2_lgr_info.peer_hostname, lgr->peer_hostname, + sizeof(smcd_lgr.v2_lgr_info.peer_hostname)); + smcd_lgr.v2_lgr_info.peer_os = lgr->peer_os; + smcd_lgr.v2_lgr_info.peer_smc_release = lgr->peer_smc_release; + smcd_lgr.v2_lgr_info.smc_version = lgr->smc_version; + snprintf(smcd_lgr.pnet_id, sizeof(smcd_lgr.pnet_id), "%s", +lgr->smcd->pnetid); + + /* Just a command place holder to signal back the command reply type */ + if (nla_put(skb, SMC_DIAG_GET_LGR_INFO, sizeof(dummy), &dummy) < 0) + goto errout; + + if (nla_put(skb, SMC_DIAG_LGR_INFO_SMCD, + sizeof(smcd_lgr), &smcd_lgr) < 0) + goto errout; + + nlmsg_end(skb, nlh); + return rc; +errout: + nlmsg_cancel(skb, nlh);
[PATCH net-next v4 09/15] net/smc: Introduce SMCR get linkgroup command
From: Guvenc Gulce Introduce get linkgroup command which loops through all available SMCR linkgroups. It uses the SMC-R linkgroup list as entry point, not the socket list, which makes linkgroup diagnosis possible, in case linkgroup does not contain active connections anymore. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/net/smc.h | 2 +- include/uapi/linux/smc.h | 5 ++ include/uapi/linux/smc_diag.h | 43 + net/smc/smc.h | 4 +- net/smc/smc_core.c| 15 ++ net/smc/smc_core.h| 7 ++- net/smc/smc_diag.c| 91 +++ 7 files changed, 162 insertions(+), 5 deletions(-) diff --git a/include/net/smc.h b/include/net/smc.h index e441aa97ad61..59d25dcb8e92 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -10,8 +10,8 @@ */ #ifndef _SMC_H #define _SMC_H +#include -#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ struct smc_hashinfo { rwlock_t lock; diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 0e11ca421ca4..635e2c2aeac5 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -3,6 +3,7 @@ * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Definitions for generic netlink based configuration of an SMC-R PNET table + * Definitions for SMC Linkgroup and Devices. * * Copyright IBM Corp. 2016 * @@ -33,4 +34,8 @@ enum {/* SMC PNET Table commands */ #define SMCR_GENL_FAMILY_NAME "SMC_PNETID" #define SMCR_GENL_FAMILY_VERSION 1 +#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */ +#define SMC_LGR_ID_SIZE4 +#define SMC_MAX_HOSTNAME_LEN 32 /* Max length of hostname */ +#define SMC_MAX_EID_LEN32 /* Max length of eid */ #endif /* _UAPI_LINUX_SMC_H */ diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 236c1c52d562..6ae028344b6d 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -4,8 +4,10 @@ #include #include +#include #include +#define SMC_DIAG_EXTS_PER_CMD 16 /* Sequence numbers */ enum { MAGIC_SEQ = 123456, @@ -21,6 +23,17 @@ struct smc_diag_req { struct inet_diag_sockid id; }; +/* Request structure v2 */ +struct smc_diag_req_v2 { + __u8diag_family; + __u8pad[2]; + __u8diag_ext; /* Query extended information */ + struct inet_diag_sockid id; + __u32 cmd; + __u32 cmd_ext; + __u8cmd_val[8]; +}; + /* Base info structure. It contains socket identity (addrs/ports/cookie) based * on the internal clcsock, and more SMC-related socket data */ @@ -57,7 +70,19 @@ enum { __SMC_DIAG_MAX, }; +/* V2 Commands */ +enum { + SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD, + __SMC_DIAG_EXT_MAX, +}; + +/* SMC_DIAG_GET_LGR_INFO command extensions */ +enum { + SMC_DIAG_LGR_INFO_SMCR = 1, +}; + #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) +#define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1) /* SMC_DIAG_CONNINFO */ @@ -88,6 +113,14 @@ struct smc_diag_conninfo { struct smc_diag_cursor tx_fin; /* confirmed sent cursor */ }; +struct smc_diag_v2_lgr_info { + __u8smc_version;/* SMC Version */ + __u8peer_smc_release; /* Peer SMC Version */ + __u8peer_os;/* Peer operating system */ + __u8negotiated_eid[SMC_MAX_EID_LEN]; /* Negotiated EID */ + __u8peer_hostname[SMC_MAX_HOSTNAME_LEN]; /* Peer host */ +}; + /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { @@ -116,4 +149,14 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ __aligned_u64 peer_token; /* Token of remote DMBE */ }; +struct smc_diag_lgr { + __u8lgr_id[SMC_LGR_ID_SIZE]; /* Linkgroup identifier */ + __u8lgr_role; /* Linkgroup role */ + __u8lgr_type; /* Linkgroup type */ + __u8pnet_id[SMC_MAX_PNETID_LEN]; /* Linkgroup pnet id */ + __u8vlan_id;/* Linkgroup vland id */ + __u32 conns_num; /* Number of connections */ + __u8reserved; /* Reserved for future use */ + struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */ +}; #endif /* _UAPI_SMC_DIAG_H_ */ diff --git a/net/smc/smc.h b/net/smc/smc.h index d65e15f0c944..d3bf81759285 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -14,6 +14,7 @@ #include #include #include /* __aligned */ +#include #include #include "smc_ib.h" @@ -29,9 +30,6 @@ * devices */ -#define SMC_MAX_HOSTNAME_LEN 32 -#define SMC_
RE: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand documentation of clocks property
> - Eaton Industries Manufacturing GmbH ~ Registered place of business: Route de la Longeraie 7, 1110, Morges, Switzerland - -Original Message- > From: Florian Fainelli > Sent: Wednesday, November 04, 2020 5:02 PM > To: Badel, Laurent ; Rob Herring > > Cc: da...@davemloft.net; fugang.d...@nxp.com; and...@lunn.ch; > lgirdw...@gmail.com; m.fel...@pengutronix.de; robh...@kernel.org; > k...@kernel.org; li...@armlinux.org.uk; richard.leit...@skidata.com; > netdev@vger.kernel.org; Quette, Arnaud ; > p.za...@pengutronix.de; devicet...@vger.kernel.org; broo...@kernel.org; > Heiner Kallweit > Subject: Re: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand > documentation of clocks property > > > > On 11/4/2020 4:11 AM, Badel, Laurent wrote: > > > > > > > - > > Eaton Industries Manufacturing GmbH ~ Registered place of business: > > Route de la Longeraie 7, 1110, Morges, Switzerland > > > > - > > > > -Original Message- > >> From: Rob Herring > >> Sent: Friday, October 30, 2020 8:19 PM > >> To: Badel, Laurent > >> Cc: da...@davemloft.net; fugang.d...@nxp.com; and...@lunn.ch; > >> lgirdw...@gmail.com; m.fel...@pengutronix.de; robh...@kernel.org; > >> k...@kernel.org; li...@armlinux.org.uk; richard.leit...@skidata.com; > >> netdev@vger.kernel.org; Quette, Arnaud ; > >> p.za...@pengutronix.de; devicet...@vger.kernel.org; > >> f.faine...@gmail.com; broo...@kernel.org; Heiner Kallweit > >> > >> Subject: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand > >> documentation of clocks property > >> > >> On Tue, 27 Oct 2020 23:27:42 +, Badel, Laurent wrote: > >>> Subject: [PATCH net 2/4] net:phy:smsc: expand documentation of > >>> clocks property > >>> > >>> Description: The ref clock is managed differently when added to the > >>> DT entry for SMSC PHY. Thus, specify this more clearly in the > documentation. > >>> > >>> Signed-off-by: Laurent Badel > >>> --- > >>> Documentation/devicetree/bindings/net/smsc-lan87xx.txt | 3 ++- > >>> 1 file changed, 2 insertions(+), 1 deletion(-) > >>> > >> > >> Acked-by: Rob Herring > > > > Thank you very much. > > I'm guessing perhaps I should re-send this as a single patch since > > there are issues with the patch series? > > I realize now that I should have splitted things differently. > > There are several things with your patch series that make it very hard to be > followed or to even know what is the latest version of your patch series. If > you can resubmit everything targeting the 'net' tree along with a cover letter > explaining the differences between v1 and v2 that would help. Please make > sure that all of your patches reference the cover letter's Message-Id which is > the default if you use git format-patch --cover-letter . > > Thanks > -- > Florian I will make sure to give details as you suggested, sorry for the trouble and thank you for your time reviewing. Laurent
[PATCH net-next v4 00/15] net/smc: extend diagnostic netlink interface
Please apply the following patch series for smc to netdev's net-next tree. This patch series refactors the current netlink API in smc_diag module which is used for diagnostic purposes and extends the netlink API in a backward compatible way so that the extended API can provide information about SMC linkgroups, links and devices (both for SMC-R and SMC-D) and can still work with the legacy netlink API. Please note that patch 9 triggers a checkpatch warning because a comment line was added using the style of the already existing comment block. v2: In patch 10, add missing include to uapi header smc_diag.h. v3: Apply code style recommendations from review comments. Instead of using EXPORTs to allow the smc_diag module to access data of the smc module, introduce struct smc_diag_ops and let smc_diag access the required data using function pointers. v4: Address checkpatch.pl warnings. Do not use static inline for functions. Guvenc Gulce (14): net/smc: Use active link of the connection net/smc: Add connection counters for links net/smc: Add link counters for IB device ports net/smc: Add diagnostic information to smc ib-device net/smc: Add diagnostic information to link structure net/smc: Refactor the netlink reply processing routine net/smc: Add ability to work with extended SMC netlink API net/smc: Introduce SMCR get linkgroup command net/smc: Introduce SMCR get link command net/smc: Add SMC-D Linkgroup diagnostic support net/smc: Add support for obtaining SMCD device list net/smc: Add support for obtaining SMCR device list net/smc: Refactor smc ism v2 capability handling net/smc: Add support for obtaining system information Karsten Graul (1): net/smc: use helper smc_conn_abort() in listen processing include/net/smc.h | 2 +- include/uapi/linux/smc.h | 8 + include/uapi/linux/smc_diag.h | 108 + net/smc/af_smc.c | 29 +- net/smc/smc.h | 4 +- net/smc/smc_clc.c | 5 + net/smc/smc_clc.h | 1 + net/smc/smc_core.c| 72 +++- net/smc/smc_core.h| 26 +- net/smc/smc_diag.c| 788 ++ net/smc/smc_ib.c | 45 ++ net/smc/smc_ib.h | 5 +- net/smc/smc_ism.c | 8 +- net/smc/smc_ism.h | 5 +- net/smc/smc_pnet.c| 3 + 15 files changed, 986 insertions(+), 123 deletions(-) -- 2.17.1
[PATCH net-next v4 14/15] net/smc: Refactor smc ism v2 capability handling
From: Guvenc Gulce Encapsulate the smc ism v2 capability boolean value in a function for better information hiding. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/af_smc.c | 12 ++-- net/smc/smc_ism.c | 8 +++- net/smc/smc_ism.h | 5 ++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bc3e45289771..850e6df47a59 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -668,7 +668,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->smc_type_v1 = SMC_TYPE_N; } /* else RDMA is supported for this connection */ } - if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) + if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini)) ini->smc_type_v2 = SMC_TYPE_N; /* if neither ISM nor RDMA are supported, fallback */ @@ -920,7 +920,7 @@ static int smc_connect_check_aclc(struct smc_init_info *ini, /* perform steps before actually connecting */ static int __smc_connect(struct smc_sock *smc) { - u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct smc_clc_msg_accept_confirm_v2 *aclc2; struct smc_clc_msg_accept_confirm *aclc; struct smc_init_info *ini = NULL; @@ -945,9 +945,9 @@ static int __smc_connect(struct smc_sock *smc) version); ini->smcd_version = SMC_V1; - ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; + ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0; ini->smc_type_v1 = SMC_TYPE_B; - ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; + ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N; /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { @@ -1354,7 +1354,7 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, rc = SMC_CLC_DECL_PEERNOSMC; goto out; } - if (!smc_ism_v2_capable) { + if (!smc_ism_is_v2_capable()) { ini->smcd_version &= ~SMC_V2; rc = SMC_CLC_DECL_NOISM2SUPP; goto out; @@ -1680,7 +1680,7 @@ static void smc_listen_work(struct work_struct *work) { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); - u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; + u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; struct socket *newclcsock = new_smc->clcsock; struct smc_clc_msg_accept_confirm *cclc; struct smc_clc_msg_proposal_area *buf; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 6abbdd09a580..2456ee8228cd 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -21,7 +21,7 @@ struct smcd_dev_list smcd_dev_list = { .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; -bool smc_ism_v2_capable; +static bool smc_ism_v2_capable; /* Test if an ISM communication is possible - same CPC */ int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) @@ -51,6 +51,12 @@ u16 smc_ism_get_chid(struct smcd_dev *smcd) return smcd->ops->get_chid(smcd); } +/* HW supports ISM V2 and thus System EID is defined */ +bool smc_ism_is_v2_capable(void) +{ + return smc_ism_v2_capable; +} + /* Set a connection using this DMBE. */ void smc_ism_set_conn(struct smc_connection *conn) { diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 8048e09ddcf8..481a4b7df30b 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -10,6 +10,7 @@ #define SMCD_ISM_H #include +#include #include #include "smc.h" @@ -20,9 +21,6 @@ struct smcd_dev_list {/* List of SMCD devices */ }; extern struct smcd_dev_listsmcd_dev_list; /* list of smcd devices */ -extern boolsmc_ism_v2_capable; /* HW supports ISM V2 and thus -* System EID is defined -*/ struct smc_ism_vlanid {/* VLAN id set on ISM device */ struct list_head list; @@ -52,5 +50,6 @@ int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos, int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); +bool smc_ism_is_v2_capable(void); void smc_ism_init(void); #endif -- 2.17.1
[PATCH net-next v4 13/15] net/smc: Add support for obtaining SMCR device list
From: Guvenc Gulce Deliver SMCR device information via netlink based diagnostic interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc_diag.h | 6 ++ net/smc/smc_core.c| 7 ++ net/smc/smc_core.h| 2 + net/smc/smc_diag.c| 133 ++ 4 files changed, 148 insertions(+) diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index ab8f76bdd1a4..4c6332785533 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -88,6 +88,7 @@ enum { /* SMC_DIAG_GET_DEV_INFO command extensions */ enum { SMC_DIAG_DEV_INFO_SMCD = 1, + SMC_DIAG_DEV_INFO_SMCR, }; #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) @@ -182,6 +183,11 @@ struct smc_diag_dev_info { __u16 pci_vendor; /* PCI Vendor */ __u16 pci_device; /* PCI Device Vendor ID */ __u8pci_id[SMC_PCI_ID_STR_LEN]; /* PCI ID */ + __u8dev_name[IB_DEVICE_NAME_MAX]; /* IB Device name */ + __u8netdev[SMC_MAX_PORTS][IFNAMSIZ]; /* Netdev name(s) */ + __u8port_state[SMC_MAX_PORTS]; /* IB Port State */ + __u8port_valid[SMC_MAX_PORTS]; /* Is IB Port valid */ + __u32 lnk_cnt_by_port[SMC_MAX_PORTS]; /* # lnks per port */ }; struct smc_diag_lgr { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 37cc754485f0..f23f8f1d10d8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -214,6 +214,11 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) conn->lgr = NULL; } +static struct smc_ib_devices *smc_get_smc_ib_devices(void) +{ + return &smc_ib_devices; +} + static struct smcd_dev_list *smc_get_smcd_dev_list(void) { return &smcd_dev_list; @@ -228,6 +233,8 @@ static const struct smc_diag_ops smc_diag_ops = { .get_lgr_list = smc_get_lgr_list, .get_smcd_devices = smc_get_smcd_dev_list, .get_chid = smc_ism_get_chid, + .get_ib_devices = smc_get_smc_ib_devices, + .is_ib_port_active = smc_ib_port_active, }; const struct smc_diag_ops *smc_get_diag_ops(void) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index eec19a8e394c..6bf89bfe34bd 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -25,6 +25,8 @@ struct smc_diag_ops { struct smc_lgr_list *(*get_lgr_list)(void); struct smcd_dev_list *(*get_smcd_devices)(void); u16 (*get_chid)(struct smcd_dev *smcd); + struct smc_ib_devices *(*get_ib_devices)(void); + bool (*is_ib_port_active)(struct smc_ib_device *smcibdev, u8 ibport); }; struct smc_lgr_list { /* list of link group definition */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 6e7798dc57fb..3d5151919326 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -386,6 +386,33 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr, return rc; } +static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr, + struct smc_ib_device *smcibdev) +{ + struct smc_link_group *lgr; + bool rc = false; + int i; + + spin_lock_bh(&smc_lgr->lock); + list_for_each_entry(lgr, &smc_lgr->list, list) { + if (lgr->is_smcd) + continue; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_UNUSED || + lgr->lnk[i].smcibdev != smcibdev) + continue; + if (lgr->type == SMC_LGR_SINGLE || + lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) { + rc = true; + goto out; + } + } + } +out: + spin_unlock_bh(&smc_lgr->lock); + return rc; +} + static int smc_diag_fill_lgr_list(struct smc_lgr_list *smc_lgr, struct sk_buff *skb, struct netlink_callback *cb, @@ -541,6 +568,109 @@ static int smc_diag_prep_smcd_dev(struct smcd_dev_list *dev_list, return rc; } +static void smc_diag_handle_dev_port(struct smc_diag_dev_info *smc_diag_dev, +struct ib_device *ibdev, +struct smc_ib_device *smcibdev, +int port) +{ + unsigned char port_state; + + smc_diag_dev->port_valid[port] = 1; + snprintf((char *)&smc_diag_dev->netdev[port], +sizeof(smc_diag_dev->netdev[port]), +"%s", (char *)&smcibdev->netdev[port]); + snprintf((char *)&smc_diag_dev->pnet_id[port], +sizeof(smc_diag_dev->pnet_id[port]), "%s", +(char *)&smcibdev->pnetid[port]
[PATCH net-next v4 15/15] net/smc: Add support for obtaining system information
From: Guvenc Gulce Add new netlink command to obtain system information of the smc module. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc.h | 1 + include/uapi/linux/smc_diag.h | 17 ++ net/smc/smc_clc.c | 5 +++ net/smc/smc_clc.h | 1 + net/smc/smc_core.c| 3 ++ net/smc/smc_core.h| 3 ++ net/smc/smc_diag.c| 62 +++ 7 files changed, 92 insertions(+) diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 736e8b98c8a5..04385a98037a 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -38,6 +38,7 @@ enum {/* SMC PNET Table commands */ #define SMC_LGR_ID_SIZE4 #define SMC_MAX_HOSTNAME_LEN 32 /* Max length of hostname */ #define SMC_MAX_EID_LEN32 /* Max length of eid */ +#define SMC_MAX_EID8 /* Max number of eids */ #define SMC_MAX_PORTS 2 /* Max # of ports per ib device */ #define SMC_PCI_ID_STR_LEN 16 /* Max length of pci id string */ #endif /* _UAPI_LINUX_SMC_H */ diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 4c6332785533..d63b08c0b7e8 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -75,6 +75,7 @@ enum { enum { SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD, SMC_DIAG_GET_DEV_INFO, + SMC_DIAG_GET_SYS_INFO, __SMC_DIAG_EXT_MAX, }; @@ -91,6 +92,11 @@ enum { SMC_DIAG_DEV_INFO_SMCR, }; +/* SMC_DIAG_GET_SYS_INFO command extensions */ +enum { + SMC_DIAG_SYS_INFO = 1, +}; + #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) #define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1) @@ -131,6 +137,17 @@ struct smc_diag_v2_lgr_info { __u8peer_hostname[SMC_MAX_HOSTNAME_LEN]; /* Peer host */ }; +struct smc_system_info { + __u8smc_version;/* SMC Version */ + __u8smc_release;/* SMC Release */ + __u8ueid_count; /* Number of UEIDs */ + __u8smc_ism_is_v2; /* Is ISM SMC v2 capable */ + __u32 reserved; /* Reserved for future use */ + __u8local_hostname[SMC_MAX_HOSTNAME_LEN]; /* Hostnames */ + __u8seid[SMC_MAX_EID_LEN]; /* System EID */ + __u8ueid[SMC_MAX_EID][SMC_MAX_EID_LEN]; /* User EIDs */ +}; + /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 696d89c2dce4..e286dafd6e88 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -772,6 +772,11 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, return len > 0 ? 0 : len; } +void smc_clc_get_hostname(u8 **host) +{ + *host = &smc_hostname[0]; +} + void __init smc_clc_init(void) { struct new_utsname *u; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 49752c997c51..32d37f7b70f2 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -334,5 +334,6 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version); void smc_clc_init(void) __init; +void smc_clc_get_hostname(u8 **host); #endif diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f23f8f1d10d8..b79daa3cf0b0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -235,6 +235,9 @@ static const struct smc_diag_ops smc_diag_ops = { .get_chid = smc_ism_get_chid, .get_ib_devices = smc_get_smc_ib_devices, .is_ib_port_active = smc_ib_port_active, + .get_system_eid = smc_ism_get_system_eid, + .get_hostname = smc_clc_get_hostname, + .is_v2_capable = smc_ism_is_v2_capable, }; const struct smc_diag_ops *smc_get_diag_ops(void) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 6bf89bfe34bd..3536fa3e45af 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -27,6 +27,9 @@ struct smc_diag_ops { u16 (*get_chid)(struct smcd_dev *smcd); struct smc_ib_devices *(*get_ib_devices)(void); bool (*is_ib_port_active)(struct smc_ib_device *smcibdev, u8 ibport); + void (*get_system_eid)(struct smcd_dev *smcd, u8 **eid); + void (*get_hostname)(u8 **host); + bool (*is_v2_capable)(void); }; struct smc_lgr_list { /* list of link group definition */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 3d5151919326..baa6c66aa320 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -671,6 +671,64 @@ static int smc_diag_prep_smcr_dev(struct smc_ib_devices *dev_list, return rc; } +static int smc_diag_prep_sys_info(struct smcd_dev_list *dev_list, +
[PATCH net-next v4 06/15] net/smc: Add diagnostic information to link structure
From: Guvenc Gulce During link creation add network and ib-device name to link structure. This is needed for diagnostic purposes. When diagnostic information is gathered, we need to traverse device, linkgroup and link structures, to be able to do that we need to hold a spinlock for the linkgroup list, without this diagnostic information in link structure, another device list mutex holding would be necessary to dereference the device pointer in the link structure which would be impossible when holding a spinlock already. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_core.c | 11 +++ net/smc/smc_core.h | 3 +++ 2 files changed, 14 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 24d55b5b352b..ca8b1644ba85 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -313,6 +313,16 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr) return link_id; } +static void smcr_copy_dev_info_to_link(struct smc_link *link) +{ + struct smc_ib_device *smcibdev = link->smcibdev; + + snprintf(link->ibname, sizeof(link->ibname), "%s", +smcibdev->ibdev->name); + snprintf(link->ndevname, sizeof(link->ndevname), "%s", +smcibdev->netdev[link->ibport - 1]); +} + int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini) { @@ -327,6 +337,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; smc_ibdev_cnt_inc(lnk); + smcr_copy_dev_info_to_link(lnk); lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 83a88a4635db..ee073a191d40 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -124,6 +124,9 @@ struct smc_link { u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ + /* Diagnostic relevant link information */ + charibname[IB_DEVICE_NAME_MAX];/* ib device name */ + charndevname[IFNAMSIZ];/* network device name */ enum smc_link_state state; /* state of link */ struct delayed_work llc_testlink_wrk; /* testlink worker */ -- 2.17.1
[PATCH net-next v4 08/15] net/smc: Add ability to work with extended SMC netlink API
From: Guvenc Gulce smc_diag module should be able to work with legacy and extended netlink api. This is done by using the sequence field of the netlink message header. Sequence field is optional and was filled with a constant value MAGIC_SEQ in the current implementation. New constant values MAGIC_SEQ_V2 and MAGIC_SEQ_V2_ACK are used to signal the usage of the new Netlink API between userspace and kernel. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc_diag.h | 7 +++ net/smc/smc_diag.c| 21 + 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 8cb3a6fef553..236c1c52d562 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -6,6 +6,13 @@ #include #include +/* Sequence numbers */ +enum { + MAGIC_SEQ = 123456, + MAGIC_SEQ_V2, + MAGIC_SEQ_V2_ACK, +}; + /* Request structure */ struct smc_diag_req { __u8diag_family; diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 44be723c97fe..bc2b616524ff 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -293,19 +293,24 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int smc_diag_dump_ext(struct sk_buff *skb, struct netlink_callback *cb) +{ + return skb->len; +} + static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { struct net *net = sock_net(skb->sk); - + struct netlink_dump_control c = { + .min_dump_alloc = SKB_WITH_OVERHEAD(32768), + }; if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { - { - struct netlink_dump_control c = { - .dump = smc_diag_dump, - .min_dump_alloc = SKB_WITH_OVERHEAD(32768), - }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); - } + if (h->nlmsg_seq >= MAGIC_SEQ_V2) + c.dump = smc_diag_dump_ext; + else + c.dump = smc_diag_dump; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } return 0; } -- 2.17.1
[PATCH net-next v4 05/15] net/smc: Add diagnostic information to smc ib-device
From: Guvenc Gulce During smc ib-device creation, add network device name to smc ib-device structure. Register for netdevice name changes and update ib-device accordingly. This is needed for diagnostic purposes. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_ib.c | 45 + net/smc/smc_ib.h | 2 ++ net/smc/smc_pnet.c | 3 +++ 3 files changed, 50 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 1c314dbdc7fa..300cca9296be 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -557,6 +557,50 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) static struct ib_client smc_ib_client; +static void smc_copy_netdev_name(struct smc_ib_device *smcibdev, int port) +{ + struct ib_device *ibdev = smcibdev->ibdev; + struct net_device *ndev; + + if (!ibdev->ops.get_netdev) + return; + ndev = ibdev->ops.get_netdev(ibdev, port + 1); + if (ndev) { + snprintf(smcibdev->netdev[port], +sizeof(smcibdev->netdev[port]), +"%s", ndev->name); + dev_put(ndev); + } +} + +void smc_ib_ndev_name_change(struct net_device *ndev) +{ + struct smc_ib_device *smcibdev; + struct ib_device *libdev; + struct net_device *lndev; + u8 port_cnt; + int i; + + mutex_lock(&smc_ib_devices.mutex); + list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { + port_cnt = smcibdev->ibdev->phys_port_cnt; + for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { + libdev = smcibdev->ibdev; + if (!libdev->ops.get_netdev) + continue; + lndev = libdev->ops.get_netdev(libdev, i + 1); + if (lndev) + dev_put(lndev); + if (lndev != ndev) + continue; + snprintf(smcibdev->netdev[i], +sizeof(smcibdev->netdev[i]), +"%s", ndev->name); + } + } + mutex_unlock(&smc_ib_devices.mutex); +} + /* callback function for ib_register_client() */ static int smc_ib_add_dev(struct ib_device *ibdev) { @@ -596,6 +640,7 @@ static int smc_ib_add_dev(struct ib_device *ibdev) if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, smcibdev->pnetid[i])) smc_pnetid_by_table_ib(smcibdev, i + 1); + smc_copy_netdev_name(smcibdev, i); pr_warn_ratelimited("smc:ib device %s port %d has pnetid " "%.16s%s\n", smcibdev->ibdev->name, i + 1, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 3b85360a473b..5319496adea0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -55,11 +55,13 @@ struct smc_ib_device { /* ib-device infos for smc */ struct mutexmutex; /* protect dev setup+cleanup */ atomic_tlnk_cnt_by_port[SMC_MAX_PORTS]; /* number of links per port */ + charnetdev[SMC_MAX_PORTS][IFNAMSIZ];/* ndev names */ }; struct smc_buf_desc; struct smc_link; +void smc_ib_ndev_name_change(struct net_device *ndev); int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index f3c18b991d35..b0f40d73afd6 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -828,6 +828,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); return NOTIFY_OK; + case NETDEV_CHANGENAME: + smc_ib_ndev_name_change(event_dev); + return NOTIFY_OK; case NETDEV_REGISTER: smc_pnet_add_by_ndev(event_dev); return NOTIFY_OK; -- 2.17.1
[PATCH net-next v4 12/15] net/smc: Add support for obtaining SMCD device list
From: Guvenc Gulce Deliver SMCD device information via netlink based diagnostic interface. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- include/uapi/linux/smc.h | 2 + include/uapi/linux/smc_diag.h | 20 net/smc/smc_core.h| 8 +++ net/smc/smc_diag.c| 95 +++ net/smc/smc_ib.h | 1 - 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 635e2c2aeac5..736e8b98c8a5 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -38,4 +38,6 @@ enum {/* SMC PNET Table commands */ #define SMC_LGR_ID_SIZE4 #define SMC_MAX_HOSTNAME_LEN 32 /* Max length of hostname */ #define SMC_MAX_EID_LEN32 /* Max length of eid */ +#define SMC_MAX_PORTS 2 /* Max # of ports per ib device */ +#define SMC_PCI_ID_STR_LEN 16 /* Max length of pci id string */ #endif /* _UAPI_LINUX_SMC_H */ diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 5a80172df757..ab8f76bdd1a4 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -74,6 +74,7 @@ enum { /* V2 Commands */ enum { SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD, + SMC_DIAG_GET_DEV_INFO, __SMC_DIAG_EXT_MAX, }; @@ -84,6 +85,11 @@ enum { SMC_DIAG_LGR_INFO_SMCD, }; +/* SMC_DIAG_GET_DEV_INFO command extensions */ +enum { + SMC_DIAG_DEV_INFO_SMCD = 1, +}; + #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1) #define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1) @@ -164,6 +170,20 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket internals */ struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */ }; +struct smc_diag_dev_info { + /* Pnet ID per device port */ + __u8pnet_id[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN]; + /* whether pnetid is set by user */ + __u8pnetid_by_user[SMC_MAX_PORTS]; + __u32 use_cnt;/* Number of linkgroups */ + __u8is_critical;/* Is device critical */ + __u32 pci_fid;/* PCI FID */ + __u16 pci_pchid; /* PCI CHID */ + __u16 pci_vendor; /* PCI Vendor */ + __u16 pci_device; /* PCI Device Vendor ID */ + __u8pci_id[SMC_PCI_ID_STR_LEN]; /* PCI ID */ +}; + struct smc_diag_lgr { __u8lgr_id[SMC_LGR_ID_SIZE]; /* Linkgroup identifier */ __u8lgr_role; /* Linkgroup role */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fb1f63f5e681..eec19a8e394c 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -373,6 +373,14 @@ static inline bool smc_link_active(struct smc_link *lnk) return lnk->state == SMC_LNK_ACTIVE; } +struct smc_pci_dev { + __u32 pci_fid; + __u16 pci_pchid; + __u16 pci_vendor; + __u16 pci_device; + __u8pci_id[SMC_PCI_ID_STR_LEN]; +}; + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a644e2299dbc..6e7798dc57fb 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,24 @@ static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb) return (struct smc_diag_dump_ctx *)cb->ctx; } +static void smc_set_pci_values(struct pci_dev *pci_dev, + struct smc_pci_dev *smc_dev) +{ + smc_dev->pci_vendor = pci_dev->vendor; + smc_dev->pci_device = pci_dev->device; + snprintf(smc_dev->pci_id, sizeof(smc_dev->pci_id), "%s", +pci_name(pci_dev)); +#if IS_ENABLED(CONFIG_S390) + { /* Set s390 specific PCI information */ + struct zpci_dev *zdev; + + zdev = to_zpci(pci_dev); + smc_dev->pci_fid = zdev->fid; + smc_dev->pci_pchid = zdev->pchid; + } +#endif +} + static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) { sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", @@ -450,6 +469,78 @@ static int smc_diag_fill_smcd_dev(struct smcd_dev_list *dev_list, return rc; } +static int smc_diag_handle_smcd_dev(struct smcd_dev *smcd, + struct sk_buff *skb, + struct netlink_callback *cb, + struct smc_diag_req_v2 *req) +{ + struct smc_diag_dev_info smc_diag_dev; + struct smc_pci_dev smc_pci_dev; + struct nlmsghdr *nlh; + int dummy = 0; + int rc = 0; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK, +
[PATCH net-next v4 03/15] net/smc: Add connection counters for links
From: Guvenc Gulce Add connection counters to the structure of the link. Increase/decrease the counters as needed in the corresponding routines. Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul --- net/smc/smc_core.c | 16 ++-- net/smc/smc_core.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2b19863f7171..323a4b396be0 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -139,6 +139,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) } if (!conn->lnk) return SMC_CLC_DECL_NOACTLINK; + atomic_inc(&conn->lnk->conn_cnt); return 0; } @@ -180,6 +181,8 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; rb_erase(&conn->alert_node, &lgr->conns_all); + if (conn->lnk) + atomic_dec(&conn->lnk->conn_cnt); lgr->conns_num--; conn->alert_token_local = 0; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ @@ -314,6 +317,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + atomic_set(&lnk->conn_cnt, 0); smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); if (!ini->ib_dev->initialized) { @@ -526,6 +530,14 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, return rc; } +static void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk) +{ + atomic_dec(&conn->lnk->conn_cnt); + conn->lnk = to_lnk; + atomic_inc(&conn->lnk->conn_cnt); +} + struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err) { @@ -574,7 +586,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, smc->sk.sk_state == SMC_PEERABORTWAIT || smc->sk.sk_state == SMC_PROCESSABORT) { spin_lock_bh(&conn->send_lock); - conn->lnk = to_lnk; + smc_switch_link_and_count(conn, to_lnk); spin_unlock_bh(&conn->send_lock); continue; } @@ -588,7 +600,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, } /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); - conn->lnk = to_lnk; + smc_switch_link_and_count(conn, to_lnk); rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 9aee54a6bcba..83a88a4635db 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -129,6 +129,7 @@ struct smc_link { struct delayed_work llc_testlink_wrk; /* testlink worker */ struct completion llc_testlink_resp; /* wait for rx of testlink */ int llc_testlink_time; /* testlink interval */ + atomic_tconn_cnt; }; /* For now we just allow one parallel link per link group. The SMC protocol -- 2.17.1
Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong wrote: > > In many case, we need to check return value of pm_runtime_get_sync, but > it brings a trouble to the usage counter processing. Many callers forget > to decrease the usage counter when it failed. It has been discussed a > lot[0][1]. So we add a function to deal with the usage counter for better > coding. > > [0]https://lkml.org/lkml/2020/6/14/88 > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ > Signed-off-by: Zhang Qilong > --- > include/linux/pm_runtime.h | 30 ++ > 1 file changed, 30 insertions(+) > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > index 4b708f4e8eed..6549ce764400 100644 > --- a/include/linux/pm_runtime.h > +++ b/include/linux/pm_runtime.h > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device *dev) > return __pm_runtime_resume(dev, RPM_GET_PUT); > } > > +/** > + * pm_runtime_general_get - Bump up usage counter of a device and resume it. > + * @dev: Target device. > + * > + * Increase runtime PM usage counter of @dev first, and carry out > runtime-resume > + * of it synchronously. If __pm_runtime_resume return negative value(device > is in > + * error state), we to need decrease the usage counter before it return. If > + * __pm_runtime_resume return positive value, it means the runtime of device > has > + * already been in active state, and we let the new wrapper return zero > instead. > + * > + * The possible return values of this function is zero or negative value. > + * zero: > + *- it means resume succeeed or runtime of device has already been > active, the > + * runtime PM usage counter of @dev remains incremented. > + * negative: > + *- it means failure and the runtime PM usage counter of @dev has been > balanced. The kerneldoc above is kind of noisy and it is hard to figure out what the helper really does from it. You could basically say something like "Resume @dev synchronously and if that is successful, increment its runtime PM usage counter. Return 0 if the runtime PM usage counter of @dev has been incremented or a negative error code otherwise." > + */ > +static inline int pm_runtime_general_get(struct device *dev) What about pm_runtime_resume_and_get()? > +{ > + int ret = 0; This extra initialization is not necessary. You can initialize ret to the __pm_runtime_resume() return value right away. > + > + ret = __pm_runtime_resume(dev, RPM_GET_PUT); > + if (ret < 0) { > + pm_runtime_put_noidle(dev); > + return ret; > + } > + > + return 0; > +} > + > /** > * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. > * @dev: Target device. > --
Re: [PATCH net-next 05/18] rtnetlink: Add RTNH_F_TRAP flag
On Fri, Nov 06, 2020 at 11:12:21AM -0800, Jakub Kicinski wrote: > On Wed, 4 Nov 2020 15:30:27 +0200 Ido Schimmel wrote: > > *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); > > if (nhc->nhc_flags & RTNH_F_OFFLOAD) > > *flags |= RTNH_F_OFFLOAD; > > + if (nhc->nhc_flags & RTNH_F_TRAP) > > + *flags |= RTNH_F_TRAP; > > Out of curiosity - why use this if construct like OFFLOAD rather than > the more concise mask like ONLINK does? Good question :) > In fact looks like the mask could just be extended there instead? Yes, good suggestion. Will do that.
[PATCH v5 7/8] can-dev: introduce helpers to access Classical CAN DLC values
can_get_len8_dlc: get value to fill len8_dlc at frame reception time can_get_cc_dlc: get DLC value to be written into CAN controller Signed-off-by: Oliver Hartkopp --- include/linux/can/dev.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index e767a96ae075..f25558609d09 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -168,10 +168,29 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) { /* the CAN specific type of skb is identified by its data length */ return skb->len == CANFD_MTU; } +/* helper to handle len8_dlc value for Classical CAN raw DLC access */ +static inline u8 can_check_len8_dlc(u32 ctrlmode, u8 len, u8 dlc, u8 ret) +{ + /* return value for len8_dlc only if all conditions apply */ + if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && + (len == CAN_MAX_DLEN) && + (dlc > CAN_MAX_DLEN && dlc <= CAN_MAX_RAW_DLC)) + ret = dlc; + + /* no valid len8_dlc value -> return provided default value */ + return ret; +} + +/* get value to fill len8_dlc in struct can_frame at frame reception time */ +#define can_get_len8_dlc(cm, len, dlc) can_check_len8_dlc(cm, len, dlc, 0) + +/* get DLC value to be written into Classical CAN controller at tx time */ +#define can_get_cc_dlc(cm, len, dlc) can_check_len8_dlc(cm, len, dlc, len) + /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) { struct can_priv *priv = netdev_priv(dev); -- 2.28.0
[PATCH v5 5/8] can: rename CAN FD related can_len2dlc and can_dlc2len helpers
The helper functions can_len2dlc and can_dlc2len are only relevant for CAN FD data length code (DLC) conversion. To fit the introduced can_cc_dlc2len for Classical CAN we rename: can_dlc2len -> can_fd_dlc2len to get the payload length from the DLC can_len2dlc -> can_fd_len2dlc to get the DLC from the payload length Suggested-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp --- Documentation/networking/can.rst | 2 +- drivers/net/can/dev.c | 8 drivers/net/can/flexcan.c | 4 ++-- drivers/net/can/ifi_canfd/ifi_canfd.c | 4 ++-- drivers/net/can/kvaser_pciefd.c | 6 +++--- drivers/net/can/m_can/m_can.c | 6 +++--- drivers/net/can/peak_canfd/peak_canfd.c | 4 ++-- drivers/net/can/rcar/rcar_canfd.c | 4 ++-- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 8 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 6 +++--- drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 4 ++-- drivers/net/can/xilinx_can.c | 4 ++-- include/linux/can/dev.h | 4 ++-- 13 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index ff05cbd05e0d..4895b0dd2714 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -1330,11 +1330,11 @@ payload. The representation of this length in can_frame.can_dlc and canfd_frame.len for userspace applications and inside the Linux network layer is a plain value from 0 .. 64 instead of the CAN 'data length code'. The data length code was a 1:1 mapping to the payload length in the legacy CAN frames anyway. The payload length to the bus-relevant DLC mapping is only performed inside the CAN drivers, preferably with the helper -functions can_dlc2len() and can_len2dlc(). +functions can_fd_dlc2len() and can_fd_len2dlc(). The CAN netdevice driver capabilities can be distinguished by the network devices maximum transfer unit (MTU):: MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 566501a02b91..7878544184b9 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -29,15 +29,15 @@ MODULE_AUTHOR("Wolfgang Grandegger "); static const u8 dlc2len[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64}; /* get data length from raw data length code (DLC) */ -u8 can_dlc2len(u8 dlc) +u8 can_fd_dlc2len(u8 dlc) { return dlc2len[dlc & 0x0F]; } -EXPORT_SYMBOL_GPL(can_dlc2len); +EXPORT_SYMBOL_GPL(can_fd_dlc2len); static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8,/* 0 - 8 */ 9, 9, 9, 9,/* 9 - 12 */ 10, 10, 10, 10,/* 13 - 16 */ 11, 11, 11, 11,/* 17 - 20 */ @@ -47,18 +47,18 @@ static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, /* 0 - 8 */ 14, 14, 14, 14, 14, 14, 14, 14,/* 41 - 48 */ 15, 15, 15, 15, 15, 15, 15, 15,/* 49 - 56 */ 15, 15, 15, 15, 15, 15, 15, 15}; /* 57 - 64 */ /* map the sanitized data length to an appropriate data length code */ -u8 can_len2dlc(u8 len) +u8 can_fd_len2dlc(u8 len) { if (unlikely(len > 64)) return 0xF; return len2dlc[len]; } -EXPORT_SYMBOL_GPL(can_len2dlc); +EXPORT_SYMBOL_GPL(can_fd_len2dlc); #ifdef CONFIG_CAN_CALC_BITTIMING #define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ /* Bit-timing calculation derived from: diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index e76fb1500fa1..5542290d29f5 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -742,11 +742,11 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de { const struct flexcan_priv *priv = netdev_priv(dev); struct canfd_frame *cfd = (struct canfd_frame *)skb->data; u32 can_id; u32 data; - u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_len2dlc(cfd->len)) << 16); + u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_fd_len2dlc(cfd->len)) << 16); int i; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -996,11 +996,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload, cfd->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; else cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK; if (reg_ctrl & FLEXCAN_MB_CNT_EDL) { - cfd->len = can_dlc2len((reg_ctrl >> 16) & 0xf); + cfd->len = can_fd_dlc2len((reg_ctrl >> 16) & 0xf); if (reg_ctrl & FLEXCAN_MB_CNT_
[PATCH v5 3/8] can: remove obsolete get_canfd_dlc() macro
The macro was always used together with can_dlc2len() which sanitizes the given dlc value on its own. Signed-off-by: Oliver Hartkopp --- drivers/net/can/flexcan.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 2 +- include/linux/can/dev.h | 1 - include/linux/can/dev/peak_canfd.h| 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d77276cd1d99..e76fb1500fa1 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -996,11 +996,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload, cfd->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; else cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK; if (reg_ctrl & FLEXCAN_MB_CNT_EDL) { - cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0xf)); + cfd->len = can_dlc2len((reg_ctrl >> 16) & 0xf); if (reg_ctrl & FLEXCAN_MB_CNT_BRS) cfd->flags |= CANFD_BRS; } else { cfd->len = can_cc_dlc2len((reg_ctrl >> 16) & 0xf); diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 9ea2adea3f0f..c6077e07214e 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -255,11 +255,11 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, struct sk_buff *skb; const u16 rx_msg_flags = le16_to_cpu(msg->flags); u8 cf_len; if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) - cf_len = can_dlc2len(get_canfd_dlc(pucan_msg_get_dlc(msg))); + cf_len = can_dlc2len(pucan_msg_get_dlc(msg)); else cf_len = can_cc_dlc2len(pucan_msg_get_dlc(msg)); /* if this frame is an echo, */ if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index c0a08400f444..3bac7274ee5b 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1403,11 +1403,11 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS) cfd->flags |= CANFD_BRS; dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); - cfd->len = can_dlc2len(get_canfd_dlc(dlc)); + cfd->len = can_dlc2len(dlc); } else { if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR) cfd->can_id |= CAN_RTR_FLAG; cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 399e9698ffeb..906a3a340131 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1249,11 +1249,11 @@ static void kvaser_usb_hydra_rx_msg_ext(const struct kvaser_usb *dev, if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN) kvaser_usb_can_rx_over_error(priv->netdev); if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF) { - cf->len = can_dlc2len(get_canfd_dlc(dlc)); + cf->len = can_dlc2len(dlc); if (flags & KVASER_USB_HYDRA_CF_FLAG_BRS) cf->flags |= CANFD_BRS; if (flags & KVASER_USB_HYDRA_CF_FLAG_ESI) cf->flags |= CANFD_ESI; } else { diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 1f08dd22b3d5..1233ef20646a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -490,11 +490,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, cfd->flags |= CANFD_BRS; if (rx_msg_flags & PUCAN_MSG_ERROR_STATE_IND) cfd->flags |= CANFD_ESI; - cfd->len = can_dlc2len(get_canfd_dlc(pucan_msg_get_dlc(rm))); + cfd->len = can_dlc2len(pucan_msg_get_dlc(rm)); } else { /* CAN 2.0 frame case */ skb = alloc_can_skb(netdev, (struct can_frame **)&cfd); if (!skb) return -ENOMEM; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 9bc84c6978ec..802606e36b58 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -103,11 +103,10 @@ static inline unsigned int can_bit_time(const struct can_bittiming *bt) * * To be us
[PATCH v5 6/8] can: update documentation for DLC usage in Classical CAN
The extension of struct can_frame with the len8_dlc element and the can_dlc naming issue required an update of the documentation. Additionally introduce the term 'Classical CAN' which has been established by CAN in Automation to separate the original CAN2.0 A/B from CAN FD. Updated some data structures and flags. Signed-off-by: Oliver Hartkopp --- Documentation/networking/can.rst | 68 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index 4895b0dd2714..f8dae662e454 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to different addressing the socket, you can read(2) and write(2) from/to the socket or use send(2), sendto(2), sendmsg(2) and the recv* counterpart operations on the socket as usual. There are also CAN specific socket options described below. -The basic CAN frame structure and the sockaddr structure are defined -in include/linux/can.h: +The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure +and the sockaddr structure are defined in include/linux/can.h: .. code-block:: C struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ -__u8can_dlc; /* frame payload length in byte (0 .. 8) */ +union { +/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN) + * was previously named can_dlc so we need to carry that + * name for legacy support + */ +__u8 len; +__u8 can_dlc; /* deprecated */ +}; __u8__pad; /* padding */ __u8__res0; /* reserved / padding */ -__u8__res1; /* reserved / padding */ +__u8len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */ __u8data[8] __attribute__((aligned(8))); }; +Remark: The len element contains the payload length in bytes and should be +used instead of can_dlc. The deprecated can_dlc was misleadingly named as +it always contained the plain payload length in bytes and not the so called +'data length code' (DLC). + +To pass the raw DLC from/to a Classical CAN network device the len8_dlc +element can contain values 9 .. 15 when the len element is 8 (the real +payload length for all DLC values greater or equal to 8). + The alignment of the (linear) payload data[] to a 64bit boundary allows the user to define their own structs and unions to easily access the CAN payload. There is no given byteorder on the CAN bus by default. A read(2) system call on a CAN_RAW socket transfers a struct can_frame to the user space. @@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific interface: int can_ifindex; union { /* transport protocol class address info (e.g. ISOTP) */ struct { canid_t rx_id, tx_id; } tp; +/* J1939 address information */ +struct { +/* 8 byte name when using dynamic addressing */ +__u64 name; + +/* pgn: + * 8 bit: PS in PDU2 case, else 0 + * 8 bit: PF + * 1 bit: DP + * 1 bit: reserved + */ +__u32 pgn; + +/* 1 byte address */ +__u8 addr; +} j1939; + /* reserved for future CAN protocols address information */ } can_addr; }; To determine the interface index an appropriate ioctl() has to @@ -369,11 +402,11 @@ bitrates for the arbitration phase and the payload phase of the CAN FD frame and up to 64 bytes of payload. This extended payload length breaks all the kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g. the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that switches the socket into a mode that allows the handling of CAN FD frames -and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`). +and Classical CAN frames simultaneously (see :ref:`socketcan-rawfd`). The struct canfd_frame is defined in include/linux/can.h: .. code-block:: C @@ -395,21 +428,21 @@ all structure elements can be used as-is - only the data[] becomes extended. When introducing the struct canfd_frame it turned out that the data length code (DLC) of the struct can_frame was used as a length information as the length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve the easy handling of the length
[PATCH v5 8/8] can-dev: add len8_dlc support for various CAN USB adapters
Support the Classical CAN raw DLC functionality to send and receive DLC values from 9 .. 15 on various Classical CAN capable USB network drivers: - gs_usb - pcan_usb - pcan_usb_fd - usb_8dev Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp --- drivers/net/can/usb/gs_usb.c | 8 ++-- drivers/net/can/usb/peak_usb/pcan_usb.c| 8 ++-- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 17 - drivers/net/can/usb/usb_8dev.c | 9 ++--- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 940589667a7f..cc0c30a5 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -330,10 +330,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) return; cf->can_id = hf->can_id; cf->len = can_cc_dlc2len(hf->len); + cf->len8_dlc = can_get_len8_dlc(dev->can.ctrlmode, cf->len, + hf->len); + memcpy(cf->data, hf->data, 8); /* ERROR frames tell us information about the controller */ if (hf->can_id & CAN_ERR_FLAG) gs_update_state(dev, cf); @@ -502,11 +505,12 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, hf->channel = dev->channel; cf = (struct can_frame *)skb->data; hf->can_id = cf->can_id; - hf->len = cf->len; + hf->len = can_get_cc_dlc(dev->can.ctrlmode, cf->len, cf->len8_dlc); + memcpy(hf->data, cf->data, cf->len); usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, GSUSB_ENDPOINT_OUT), hf, @@ -856,11 +860,11 @@ static struct gs_can *gs_make_candev(unsigned int channel, dev->can.state = CAN_STATE_STOPPED; dev->can.clock.freq = bt_const->fclk_can; dev->can.bittiming_const = &dev->bt_const; dev->can.do_set_bittiming = gs_usb_set_bittiming; - dev->can.ctrlmode_supported = 0; + dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index ec34f87cc02c..5a8dffacc24e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -733,10 +733,12 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) cf->can_id = le16_to_cpu(tmp16) >> 5; } cf->len = can_cc_dlc2len(rec_len); + cf->len8_dlc = can_get_len8_dlc(mc->pdev->dev.can.ctrlmode, cf->len, + rec_len); /* Only first packet timestamp is a word */ if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx)) goto decode_failed; @@ -836,11 +838,12 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, obuf[1] = 1; pc = obuf + PCAN_USB_MSG_HEADER_LEN; /* status/len byte */ - *pc = cf->len; + *pc = can_get_cc_dlc(dev->can.ctrlmode, cf->len, cf->len8_dlc); + if (cf->can_id & CAN_RTR_FLAG) *pc |= PCAN_USB_STATUSLEN_RTR; /* can id */ if (cf->can_id & CAN_EFF_FLAG) { @@ -990,11 +993,12 @@ static const struct can_bittiming_const pcan_usb_const = { const struct peak_usb_adapter pcan_usb = { .name = "PCAN-USB", .device_id = PCAN_USB_PRODUCT_ID, .ctrl_count = 1, .ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_BERR_REPORTING, + CAN_CTRLMODE_BERR_REPORTING | + CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_USB_CRYSTAL_HZ / 2 , }, .bittiming_const = &pcan_usb_const, diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 761e78d8e647..8020071c9067 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -492,16 +492,21 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, if (rx_msg_flags & PUCAN_MSG_ERROR_STATE_IND) cfd->flags |= CANFD_ESI; cfd->len = can_fd_dlc2len(pucan_msg_get_dlc(rm)); } else { + struct can_frame *cf; + /* CAN 2.0 frame case */ skb = alloc_can_skb(netdev, (struct can_frame **)&cfd); if (!skb) return -ENOMEM; cfd->len = can_cc_dlc2len(pucan_msg_get_dlc(rm)); + cf = (struct can_fram
[PATCH v5 2/8] can: rename get_can_dlc() macro with can_cc_dlc2len()
The get_can_dlc() macro is used to ensure the payload length information of the Classical CAN frame to be max 8 bytes (the CAN_MAX_DLEN). Rename the macro and use the correct constant in preparation of the len/dlc cleanup for Classical CAN frames. Signed-off-by: Oliver Hartkopp --- drivers/net/can/at91_can.c| 2 +- drivers/net/can/c_can/c_can.c | 2 +- drivers/net/can/cc770/cc770.c | 2 +- drivers/net/can/flexcan.c | 2 +- drivers/net/can/grcan.c | 2 +- drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +- drivers/net/can/janz-ican3.c | 4 ++-- drivers/net/can/m_can/m_can.c | 2 +- drivers/net/can/mscan/mscan.c | 2 +- drivers/net/can/pch_can.c | 4 ++-- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/rcar/rcar_can.c | 2 +- drivers/net/can/rcar/rcar_canfd.c | 4 ++-- drivers/net/can/sja1000/sja1000.c | 2 +- drivers/net/can/softing/softing_main.c| 2 +- drivers/net/can/spi/hi311x.c | 2 +- drivers/net/can/spi/mcp251x.c | 4 ++-- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 2 +- drivers/net/can/sun4i_can.c | 2 +- drivers/net/can/ti_hecc.c | 2 +- drivers/net/can/usb/ems_usb.c | 2 +- drivers/net/can/usb/esd_usb2.c| 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 ++-- drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 4 ++-- drivers/net/can/usb/mcba_usb.c| 2 +- drivers/net/can/usb/peak_usb/pcan_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 2 +- drivers/net/can/usb/ucan.c| 8 drivers/net/can/usb/usb_8dev.c| 2 +- drivers/net/can/xilinx_can.c | 4 ++-- include/linux/can/dev.h | 8 32 files changed, 45 insertions(+), 45 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index c14de95d2ca7..db06254f8eb7 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -578,11 +578,11 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb, cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; else cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK; reg_msr = at91_read(priv, AT91_MSR(mb)); - cf->can_dlc = get_can_dlc((reg_msr >> 16) & 0xf); + cf->can_dlc = can_cc_dlc2len((reg_msr >> 16) & 0xf); if (reg_msr & AT91_MSR_MRTR) cf->can_id |= CAN_RTR_FLAG; else { *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb)); diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 1ccdbe89585b..56cc705959ea 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -395,11 +395,11 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl) if (!skb) { stats->rx_dropped++; return -ENOMEM; } - frame->can_dlc = get_can_dlc(ctrl & 0x0F); + frame->can_dlc = can_cc_dlc2len(ctrl & 0x0F); arb = priv->read_reg32(priv, C_CAN_IFACE(ARB1_REG, iface)); if (arb & IF_ARB_MSGXTD) frame->can_id = (arb & CAN_EFF_MASK) | CAN_EFF_FLAG; diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 07e2b8df5153..3fd2a276dd93 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -484,11 +484,11 @@ static void cc770_rx(struct net_device *dev, unsigned int mo, u8 ctrl1) id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 8; id >>= 5; } cf->can_id = id; - cf->can_dlc = get_can_dlc((config & 0xf0) >> 4); + cf->can_dlc = can_cc_dlc2len((config & 0xf0) >> 4); for (i = 0; i < cf->can_dlc; i++) cf->data[i] = cc770_read_reg(priv, msgobj[mo].data[i]); } stats->rx_packets++; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 881799bd9c5e..d77276cd1d99 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1001,11 +1001,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload, cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0xf)); if (reg_ctrl & FLEXCAN_MB_CNT_BRS) cfd->flags |= CANFD_BRS; } else { - cfd->len = get_can_dlc((reg_ctrl >> 16) & 0xf); + cfd->len = can_cc_dlc2len((reg_ctrl >> 16) & 0xf);
[PATCH v5 1/8] can: add optional DLC element to Classical CAN frame structure
ISO 11898-1 Chapter 8.4.2.3 defines a 4 bit data length code (DLC) table which maps the DLC to the payload length of the CAN frame in bytes: DLC -> payload length 0 .. 8 -> 0 .. 8 9 .. 15 -> 8 Although the DLC values 8 .. 15 in Classical CAN always result in a payload length of 8 bytes these DLC values are transparently transmitted on the CAN bus. As the struct can_frame only provides a 'len' element (formerly 'can_dlc') which contains the plain payload length ( 0 .. 8 ) of the CAN frame, the raw DLC is not visible to the application programmer, e.g. for testing use-cases. To access the raw DLC values 9 .. 15 the len8_dlc element is introduced, which is only valid when the payload length 'len' is 8 and the DLC is greater than 8. The len8_dlc element is filled by the CAN interface driver and used for CAN frame creation by the CAN driver when the CAN_CTRLMODE_CC_LEN8_DLC flag is supported by the driver and enabled via netlink configuration interface. Reported-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp --- include/uapi/linux/can.h | 38 include/uapi/linux/can/netlink.h | 1 + 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 6a6d2c7655ff..f75238ac6dce 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -82,34 +82,44 @@ typedef __u32 canid_t; */ typedef __u32 can_err_mask_t; /* CAN payload length and DLC definitions according to ISO 11898-1 */ #define CAN_MAX_DLC 8 +#define CAN_MAX_RAW_DLC 15 #define CAN_MAX_DLEN 8 /* CAN FD payload length and DLC definitions according to ISO 11898-7 */ #define CANFD_MAX_DLC 15 #define CANFD_MAX_DLEN 64 /** - * struct can_frame - basic CAN frame structure - * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition - * @can_dlc: frame payload length in byte (0 .. 8) aka data length code - * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 - * mapping of the 'data length code' to the real payload length - * @__pad: padding - * @__res0: reserved / padding - * @__res1: reserved / padding - * @data:CAN frame payload (up to 8 byte) + * struct can_frame - Classical CAN frame structure (aka CAN 2.0B) + * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition + * @len: CAN frame payload length in byte (0 .. 8) + * @can_dlc: deprecated name for CAN frame payload length in byte (0 .. 8) + * @__pad:padding + * @__res0: reserved / padding + * @len8_dlc: optional DLC value (9 .. 15) at 8 byte payload length + *len8_dlc contains values from 9 .. 15 when the payload length is + *8 bytes but the DLC value (see ISO 11898-1) is greater then 8. + *CAN_CTRLMODE_CC_LEN8_DLC flag has to be enabled in CAN driver. + * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ - __u8can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ - __u8__pad; /* padding */ - __u8__res0; /* reserved / padding */ - __u8__res1; /* reserved / padding */ - __u8data[CAN_MAX_DLEN] __attribute__((aligned(8))); + union { + /* CAN frame payload length in byte (0 .. CAN_MAX_DLEN) +* was previously named can_dlc so we need to carry that +* name for legacy support +*/ + __u8 len; + __u8 can_dlc; /* deprecated */ + }; + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */ + __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; /* * defined bits for canfd_frame.flags * diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 6f598b73839e..f730d443b918 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -98,10 +98,11 @@ struct can_ctrlmode { #define CAN_CTRLMODE_ONE_SHOT 0x08/* One-Shot mode */ #define CAN_CTRLMODE_BERR_REPORTING0x10/* Bus-error reporting */ #define CAN_CTRLMODE_FD0x20/* CAN FD mode */ #define CAN_CTRLMODE_PRESUME_ACK 0x40/* Ignore missing CAN ACKs */ #define CAN_CTRLMODE_FD_NON_ISO0x80/* CAN FD in non-ISO mode */ +#define CAN_CTRLMODE_CC_LEN8_DLC 0x100 /* Classic CAN DLC option */ /* * CAN device statistics */ struct can_device_stats { -- 2.28.0
[PATCH v5 0/8] Introduce optional DLC element for Classic CAN
Introduce improved DLC handling for Classic CAN with introduces a new element 'len8_dlc' to the struct can_frame and additionally rename the 'can_dlc' element to 'len' as it represents a plain payload length. Before implementing the CAN_CTRLMODE_CC_LEN8_DLC handling on driver level this patch set cleans up and renames the relevant code. No functional changes. This patch set is based on kernel/git/netdev/net-next.git Changes in v2: - rephrase commit message of patch 4 about can_dlc replacement Changes in v3: - remove unnecessarily introduced u8 cast in flexcan.c Changes in v4: - adopt phrasing suggestions from Vincent Mailhol - separate and extend CAN documentation (Documentation/networking/can.rst) - add new patches for len8_dlc handling for CAN drivers - add new helpers in include/linux/can/dev.h - add len8_dlc support for various CAN USB adapters as reference Changes in v5: - rename CAN FD related can_len2dlc and can_dlc2len helpers so that they fit to the renamed can_cc_dlc2len helper for Classical CAN (suggested by Vincent Mailhol) Oliver Hartkopp (8): can: add optional DLC element to Classical CAN frame structure can: rename get_can_dlc() macro with can_cc_dlc2len() can: remove obsolete get_canfd_dlc() macro can: replace can_dlc as variable/element for payload length can: rename CAN FD related can_len2dlc and can_dlc2len helpers can: update documentation for DLC usage in Classical CAN can-dev: introduce helpers to access Classical CAN DLC values can-dev: add len8_dlc support for various CAN USB adapters Documentation/networking/can.rst | 70 ++- drivers/net/can/at91_can.c| 14 ++-- drivers/net/can/c_can/c_can.c | 20 +++--- drivers/net/can/cc770/cc770.c | 14 ++-- drivers/net/can/dev.c | 16 ++--- drivers/net/can/flexcan.c | 6 +- drivers/net/can/grcan.c | 10 +-- drivers/net/can/ifi_canfd/ifi_canfd.c | 10 +-- drivers/net/can/janz-ican3.c | 20 +++--- drivers/net/can/kvaser_pciefd.c | 10 +-- drivers/net/can/m_can/m_can.c | 12 ++-- drivers/net/can/mscan/mscan.c | 20 +++--- drivers/net/can/pch_can.c | 14 ++-- drivers/net/can/peak_canfd/peak_canfd.c | 16 ++--- drivers/net/can/rcar/rcar_can.c | 14 ++-- drivers/net/can/rcar/rcar_canfd.c | 12 ++-- drivers/net/can/rx-offload.c | 2 +- drivers/net/can/sja1000/sja1000.c | 10 +-- drivers/net/can/slcan.c | 32 - drivers/net/can/softing/softing_fw.c | 2 +- drivers/net/can/softing/softing_main.c| 14 ++-- drivers/net/can/spi/hi311x.c | 20 +++--- drivers/net/can/spi/mcp251x.c | 20 +++--- .../net/can/spi/mcp251xfd/mcp251xfd-core.c| 10 +-- drivers/net/can/sun4i_can.c | 10 +-- drivers/net/can/ti_hecc.c | 8 +-- drivers/net/can/usb/ems_usb.c | 16 ++--- drivers/net/can/usb/esd_usb2.c| 16 ++--- drivers/net/can/usb/gs_usb.c | 20 +++--- .../net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 24 +++ .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 22 +++--- drivers/net/can/usb/mcba_usb.c| 10 +-- drivers/net/can/usb/peak_usb/pcan_usb.c | 20 +++--- drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 29 +--- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 14 ++-- drivers/net/can/usb/ucan.c| 20 +++--- drivers/net/can/usb/usb_8dev.c| 21 +++--- drivers/net/can/xilinx_can.c | 16 ++--- include/linux/can/dev.h | 32 +++-- include/linux/can/dev/peak_canfd.h| 2 +- include/uapi/linux/can.h | 38 ++ include/uapi/linux/can/netlink.h | 1 + net/can/af_can.c | 2 +- net/can/gw.c | 2 +- net/can/j1939/main.c | 4 +- 46 files changed, 400 insertions(+), 317 deletions(-) -- 2.28.0
[PATCH v5 4/8] can: replace can_dlc as variable/element for payload length
The naming of can_dlc as element of struct can_frame and also as variable name is misleading as it claims to be a 'data length CODE' but in reality it always was a plain data length. With the indroduction of a new 'len' element in struct can_frame we can now remove can_dlc as name and make clear which of the former uses was a plain length (-> 'len') or a data length code (-> 'dlc') value. Signed-off-by: Oliver Hartkopp --- drivers/net/can/at91_can.c| 14 drivers/net/can/c_can/c_can.c | 20 ++-- drivers/net/can/cc770/cc770.c | 14 drivers/net/can/dev.c | 10 +++--- drivers/net/can/grcan.c | 10 +++--- drivers/net/can/ifi_canfd/ifi_canfd.c | 4 +-- drivers/net/can/janz-ican3.c | 20 ++-- drivers/net/can/kvaser_pciefd.c | 4 +-- drivers/net/can/m_can/m_can.c | 4 +-- drivers/net/can/mscan/mscan.c | 20 ++-- drivers/net/can/pch_can.c | 12 +++ drivers/net/can/peak_canfd/peak_canfd.c | 12 +++ drivers/net/can/rcar/rcar_can.c | 14 drivers/net/can/rcar/rcar_canfd.c | 4 +-- drivers/net/can/rx-offload.c | 2 +- drivers/net/can/sja1000/sja1000.c | 10 +++--- drivers/net/can/slcan.c | 32 +-- drivers/net/can/softing/softing_fw.c | 2 +- drivers/net/can/softing/softing_main.c| 14 drivers/net/can/spi/hi311x.c | 20 ++-- drivers/net/can/spi/mcp251x.c | 18 +-- drivers/net/can/sun4i_can.c | 10 +++--- drivers/net/can/ti_hecc.c | 8 ++--- drivers/net/can/usb/ems_usb.c | 16 +- drivers/net/can/usb/esd_usb2.c| 16 +- drivers/net/can/usb/gs_usb.c | 14 .../net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 16 +- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 22 ++--- drivers/net/can/usb/mcba_usb.c| 10 +++--- drivers/net/can/usb/peak_usb/pcan_usb.c | 14 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 10 +++--- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 14 drivers/net/can/usb/ucan.c| 14 drivers/net/can/usb/usb_8dev.c| 14 drivers/net/can/xilinx_can.c | 10 +++--- include/linux/can/dev.h | 4 +-- net/can/af_can.c | 2 +- net/can/gw.c | 2 +- net/can/j1939/main.c | 4 +-- 40 files changed, 231 insertions(+), 231 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index db06254f8eb7..5284f0ab3b06 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -466,11 +466,11 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_err(dev, "BUG! TX buffer full when queue awake!\n"); return NETDEV_TX_BUSY; } reg_mid = at91_can_id_to_reg_mid(cf->can_id); reg_mcr = ((cf->can_id & CAN_RTR_FLAG) ? AT91_MCR_MRTR : 0) | - (cf->can_dlc << 16) | AT91_MCR_MTCR; + (cf->len << 16) | AT91_MCR_MTCR; /* disable MB while writing ID (see datasheet) */ set_mb_mode(priv, mb, AT91_MB_MODE_DISABLED); at91_write(priv, AT91_MID(mb), reg_mid); set_mb_mode_prio(priv, mb, AT91_MB_MODE_TX, prio); @@ -479,11 +479,11 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) at91_write(priv, AT91_MDH(mb), *(u32 *)(cf->data + 4)); /* This triggers transmission */ at91_write(priv, AT91_MCR(mb), reg_mcr); - stats->tx_bytes += cf->can_dlc; + stats->tx_bytes += cf->len; /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */ can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv)); /* @@ -552,11 +552,11 @@ static void at91_rx_overflow_err(struct net_device *dev) cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; stats->rx_packets++; - stats->rx_bytes += cf->can_dlc; + stats->rx_bytes += cf->len; netif_receive_skb(skb); } /** * at91_read_mb - read CAN msg from mailbox (lowlevel impl) @@ -578,11 +578,11 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb, cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG; else cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK; reg_msr = at91_read(priv, AT91_MSR(mb)); - cf->can_dlc = can_cc_dlc2len((reg_msr >> 16) & 0xf); + cf->len = can_cc_dlc2len((reg_msr >> 16) & 0x
Re: [PATCH v4 4/7] can: replace can_dlc as variable/element for payload length
Hi Vincent, On 09.11.20 13:59, Vincent MAILHOL wrote: On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote: diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index b2e8df8e4cb0..72671184a7a2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -183,12 +183,12 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, /* override MTU which was set by default in can_setup()? */ if (static_mode & CAN_CTRLMODE_FD) dev->mtu = CANFD_MTU; } -/* get data length from can_dlc with sanitized can_dlc */ -u8 can_dlc2len(u8 can_dlc); +/* get data length from raw data length code (DLC) */ /* * convert a given data length code (dlc) of an FD CAN frame into a * valid data length of max. 64 bytes. */ I missed this point during my previous review: the can_dlc2len() function is only valid for CAN FD frames. Comments should reflect this fact. +u8 can_dlc2len(u8 dlc); Concerning the name: * can_get_cc_len() converts a Classical CAN frame DLC into a data length. * can_dlc2len() converts an FD CAN frame DLC into a data length. Just realized that both macro/function do similar things so we could think of a similar naming as well. * Example 1: can_get_cc_len() and can_get_fd_len() * Example 2: can_cc_dlc2len() and can_fd_dlc2len() I like! Patch set v5 is out now. Thanks, Oliver Or we could simply leave things as they are, this is not a big issue as long as the comments clearly state which one is for classical frames and which one is for FD frames. /* map the sanitized data length to an appropriate data length code */ u8 can_len2dlc(u8 len); can_len2dlc() might be renamed (e.g. can_get_fd_dlc()) if Example 1 solution is chosen. struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, Yours sincerely, Vincent Mailhol
Re: [PATCH] net: tcp: ratelimit warnings in tcp_recvmsg
On 11/9/20 3:48 PM, Menglong Dong wrote: > On Mon, Nov 9, 2020 at 9:36 PM Eric Dumazet wrote: >> >> I do not think this patch is useful. That is simply code churn. >> >> Can you trigger the WARN() in the latest upstream version ? >> If yes this is a serious bug that needs urgent attention. >> >> Make sure you have backported all needed fixes into your kernel, if >> you get this warning on a non pristine kernel. > > Theoretically, this WARN() shouldn't be triggered in any branches. > Somehow, it just happened in kernel v3.10. This really confused me. I > wasn't able to keep tracing it, as it is a product environment. > > I notice that the codes for tcp skb receiving didn't change much > between v3.10 and the latest upstream version, and guess the latest > version can be triggered too. > > If something is fixed and this WARN() won't be triggered, just ignore me. > Yes, I confirm this WARN() should not trigger. The bug is not in tcp recvmsg(), that is why you do not see obvious fix for this issue in 3.10
Re: [PATCH net-next 00/18] nexthop: Add support for nexthop objects offload
On Fri, Nov 06, 2020 at 11:31:59AM -0800, Jakub Kicinski wrote: > On Wed, 4 Nov 2020 15:30:22 +0200 Ido Schimmel wrote: > > From: Ido Schimmel > > > > This patch set adds support for nexthop objects offload with a dummy > > implementation over netdevsim. mlxsw support will be added later. > > > > The general idea is very similar to route offload in that notifications > > are sent whenever nexthop objects are changed. A listener can veto the > > change and the error will be communicated to user space with extack. > > > > To keep listeners as simple as possible, they not only receive > > notifications for the nexthop object that is changed, but also for all > > the other objects affected by this change. For example, when a single > > nexthop is replaced, a replace notification is sent for the single > > nexthop, but also for all the nexthop groups this nexthop is member in. > > This relieves listeners from the need to track such dependencies. > > > > To simplify things further for listeners, the notification info does not > > contain the raw nexthop data structures (e.g., 'struct nexthop'), but > > less complex data structures into which the raw data structures are > > parsed into. > > Applied, thank you! Great, thank you. And thanks David for the awesome work on the nexthop infrastructure. > > BTW no need to follow up on my else-after-return comment, > just something to keep in mind. Ack
Re: [PATCH] netfilter: conntrack: fix -Wformat
Hi Nick, I love your patch! Perhaps something to improve: [auto build test WARNING on nf-next/master] [also build test WARNING on nf/master ipvs/master v5.10-rc3 next-20201109] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Nick-Desaulniers/netfilter-conntrack-fix-Wformat/20201109-085104 base: https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master config: riscv-randconfig-s031-20201109 (attached as .config) compiler: riscv32-linux-gcc (GCC) 9.3.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.3-76-gf680124b-dirty # https://github.com/0day-ci/linux/commit/407a53117fa32f8f17a73a51bced0e85f168acb4 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Nick-Desaulniers/netfilter-conntrack-fix-Wformat/20201109-085104 git checkout 407a53117fa32f8f17a73a51bced0e85f168acb4 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=riscv If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot "sparse warnings: (new ones prefixed by >>)" >> net/netfilter/nf_conntrack_standalone.c:56:29: sparse: sparse: cast to >> restricted __be16 net/netfilter/nf_conntrack_standalone.c:60:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:61:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:66:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:67:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:72:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:73:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:77:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:78:29: sparse: sparse: cast to restricted __be16 net/netfilter/nf_conntrack_standalone.c:84:29: sparse: sparse: cast to restricted __be16 vim +56 net/netfilter/nf_conntrack_standalone.c 32 33 #ifdef CONFIG_NF_CONNTRACK_PROCFS 34 void 35 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 36 const struct nf_conntrack_l4proto *l4proto) 37 { 38 switch (tuple->src.l3num) { 39 case NFPROTO_IPV4: 40 seq_printf(s, "src=%pI4 dst=%pI4 ", 41 &tuple->src.u3.ip, &tuple->dst.u3.ip); 42 break; 43 case NFPROTO_IPV6: 44 seq_printf(s, "src=%pI6 dst=%pI6 ", 45 tuple->src.u3.ip6, tuple->dst.u3.ip6); 46 break; 47 default: 48 break; 49 } 50 51 switch (l4proto->l4proto) { 52 case IPPROTO_ICMP: 53 seq_printf(s, "type=%u code=%u id=%hu ", 54 tuple->dst.u.icmp.type, 55 tuple->dst.u.icmp.code, > 56 (__be16)ntohs(tuple->src.u.icmp.id)); 57 break; 58 case IPPROTO_TCP: 59 seq_printf(s, "sport=%hu dport=%hu ", 60 (__be16)ntohs(tuple->src.u.tcp.port), 61 (__be16)ntohs(tuple->dst.u.tcp.port)); 62 break; 63 case IPPROTO_UDPLITE: 64 case IPPROTO_UDP: 65 seq_printf(s, "sport=%hu dport=%hu ", 66 (__be16)ntohs(tuple->src.u.udp.port), 67 (__be16)ntohs(tuple->dst.u.udp.port)); 68 69 break; 70 case IPPROTO_DCCP: 71 seq_printf(s, "sport=%hu dport=%hu ", 72 (__be16)ntohs(tuple->src.u.dccp.port), 73 (__be16)ntohs(tuple->dst.u.dccp.port)); 74 break; 75 case IPPROTO_SCTP: 76 seq_printf(s, "sport=%hu dport=%hu ", 77 (__be16)ntohs(tuple->src.u.sctp.port), 78 (__be16)ntohs(tuple->dst.u.sctp.port)); 79
[PATCH net-next] net: phy: aquantia: do not return an error on clearing pending IRQs
From: Ioana Ciornei The referenced commit added in .config_intr() the part of code which upon configuration of the IRQ state it also clears up any pending IRQ. If there were actually pending IRQs, a read on the IRQ status register will return something non zero. This should not result in the callback returning an error. Fix this by returning an error only when the result of the phy_read_mmd() is negative. Fixes: e11ef96d44f1 ("net: phy: aquantia: remove the use of .ack_interrupt()") Signed-off-by: Ioana Ciornei --- drivers/net/phy/aquantia_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 345f70f9d39b..968dd43a2b1e 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -250,7 +250,7 @@ static int aqr_config_intr(struct phy_device *phydev) if (en) { /* Clear any pending interrupts before enabling them */ err = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS2); - if (err) + if (err < 0) return err; } @@ -273,7 +273,7 @@ static int aqr_config_intr(struct phy_device *phydev) if (!en) { /* Clear any pending interrupts after we have disabled them */ err = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS2); - if (err) + if (err < 0) return err; } -- 2.28.0
Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.
On Mon 09 Nov 2020 at 16:50, Marcelo Ricardo Leitner wrote: > On Mon, Nov 09, 2020 at 03:24:37PM +0200, Vlad Buslov wrote: >> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote: > ... >> > @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY >> > To compile this code as a module, choose M here: the >> > module will be called act_tunnel_key. >> > >> > +config NET_ACT_FRAG >> > + tristate "Packet fragmentation" >> > + depends on NET_CLS_ACT >> > + help >> > + Say Y here to allow fragmenting big packets when outputting >> > + with the mirred action. >> > + >> > +If unsure, say N. >> > + >> > +To compile this code as a module, choose M here: the >> > +module will be called act_frag. >> > + >> >> Just wondering, what is the motivation for putting the frag code into >> standalone module? It doesn't implement usual act_* interface and is not >> user-configurable. To me it looks like functionality that belongs to >> act_api. Am I missing something? > > It's the way we found so far for not "polluting" mirred/tc with L3 > functionality, per Cong's feedbacks on previous attempts. As for why > not act_api, this is not some code that other actions can just re-use > and that file is already quite big, so I thought act_frag would be > better to keep it isolated/contained. Hmmm okay. > > If act_frag is confusing, then maybe act_mirred_frag? It is a mirred > plugin now, after all. Would be even more confusing to me since the act_frag module code is only directly accessed from act_ct and not act_mirred :) Anyway, I don't have a strong opinion regarding this. Just wanted to understand the motivation. > > ... >> > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, >> > + int (*xmit)(struct sk_buff *skb))) >> > +{ >> > + if (!tcf_xmit_hook_enabled()) >> > + xchg(&tcf_xmit_hook, xmit_hook); >> >> Marcelo, why did you suggest to use atomic operations to change >> tcf_xmit_hook variable? It is not obvious to me after reading the code. > > I thought as a minimal way to not have problems on module removal, but > your comment below proves it is not right/enough. :-) > >> >> > + else if (xmit_hook != tcf_xmit_hook) >> > + return -EBUSY; >> > + >> > + tcf_inc_xmit_hook(); >> > + >> > + return 0; >> > +} >> > +EXPORT_SYMBOL_GPL(tcf_set_xmit_hook); >> > + >> > +void tcf_clear_xmit_hook(void) >> > +{ >> > + tcf_dec_xmit_hook(); >> > + >> > + if (!tcf_xmit_hook_enabled()) >> > + xchg(&tcf_xmit_hook, NULL); >> > +} >> > +EXPORT_SYMBOL_GPL(tcf_clear_xmit_hook); >> > + >> > +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff >> > *skb)) >> > +{ >> > + if (tcf_xmit_hook_enabled()) >> >> Okay, so what happens here if tcf_xmit_hook is disabled concurrently? If >> we get here from some rule that doesn't involve act_ct but uses >> act_mirred and act_ct is concurrently removed decrementing last >> reference to static branch and setting tcf_xmit_hook to NULL? > > Yeah.. good point. Thinking further now, what about using RCU for the > hook? AFAICT it can cover the synchronization needed when clearing the > pointer, tcf_set_xmit_hook() should do a module_get() and > tcf_clear_xmit_hook() can delay a module_put(act_frag) as needed with > call_rcu. Wouldn't it be enough to just call synchronize_rcu() in tcf_clear_xmit_hook() after setting tcf_xmit_hook to NULL? act_ct module removal should be very rare, so synchronously waiting for rcu grace period to complete is probably okay. > > I see tcf_mirred_act is already calling rcu_dereference_bh(), so > it's already protected by rcu read here and calling tcf_xmit_hook() > with xmit pointer should be fine. WDYT? Yes, good idea. > >> >> > + return tcf_xmit_hook(skb, xmit); >> > + else >> > + return xmit(skb); >> > +} >> > +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);
答复: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
> operation to deal with usage counter > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong > wrote: > > > > In many case, we need to check return value of pm_runtime_get_sync, > > but it brings a trouble to the usage counter processing. Many callers > > forget to decrease the usage counter when it failed. It has been > > discussed a lot[0][1]. So we add a function to deal with the usage > > counter for better coding. > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951 > > 48.10995-1-dinghao@zju.edu.cn/ > > Signed-off-by: Zhang Qilong > > --- > > include/linux/pm_runtime.h | 30 ++ > > 1 file changed, 30 insertions(+) > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > index 4b708f4e8eed..6549ce764400 100644 > > --- a/include/linux/pm_runtime.h > > +++ b/include/linux/pm_runtime.h > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device > *dev) > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > +/** > > + * pm_runtime_general_get - Bump up usage counter of a device and > resume it. > > + * @dev: Target device. > > + * > > + * Increase runtime PM usage counter of @dev first, and carry out > > +runtime-resume > > + * of it synchronously. If __pm_runtime_resume return negative > > +value(device is in > > + * error state), we to need decrease the usage counter before it > > +return. If > > + * __pm_runtime_resume return positive value, it means the runtime of > > +device has > > + * already been in active state, and we let the new wrapper return zero > instead. > > + * > > + * The possible return values of this function is zero or negative value. > > + * zero: > > + *- it means resume succeeed or runtime of device has already been > active, the > > + * runtime PM usage counter of @dev remains incremented. > > + * negative: > > + *- it means failure and the runtime PM usage counter of @dev has > been balanced. > > The kerneldoc above is kind of noisy and it is hard to figure out what the > helper > really does from it. > > You could basically say something like "Resume @dev synchronously and if that > is successful, increment its runtime PM usage counter. Return > 0 if the runtime PM usage counter of @dev has been incremented or a negative > error code otherwise." > How about the following description. /** 390 * pm_runtime_general_get - Bump up usage counter of a device and resume it. 391 * @dev: Target device. 392 * 393 * Increase runtime PM usage counter of @dev first, and carry out runtime-resume 394 * of it synchronously. If __pm_runtime_resume return negative value(device is in 395 * error state), we to need decrease the usage counter before it return. If 396 * __pm_runtime_resume return positive value, it means the runtime of device has 397 * already been in active state, and we let the new wrapper return zero instead. 398 * 399 * Resume @dev synchronously and if that is successful, and increment its runtime 400 * PM usage counter if it turn out to equal to 0. The runtime PM usage counter of 401 * @dev has been incremented or a negative error code otherwise. 402 */ Thanks, Zhang > > + */ > > +static inline int pm_runtime_general_get(struct device *dev) > > What about pm_runtime_resume_and_get()? > I think it's OK. > > +{ > > + int ret = 0; > > This extra initialization is not necessary. > > You can initialize ret to the __pm_runtime_resume() return value right away. > OK, good idea. > > + > > + ret = __pm_runtime_resume(dev, RPM_GET_PUT); > > + if (ret < 0) { > > + pm_runtime_put_noidle(dev); > > + return ret; > > + } > > + > > + return 0; > > +} > > + > > /** > > * pm_runtime_put - Drop device usage counter and queue up "idle check" > if 0. > > * @dev: Target device. > > --
Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki wrote: > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong wrote: > > > > In many case, we need to check return value of pm_runtime_get_sync, but > > it brings a trouble to the usage counter processing. Many callers forget > > to decrease the usage counter when it failed. It has been discussed a > > lot[0][1]. So we add a function to deal with the usage counter for better > > coding. > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ > > Signed-off-by: Zhang Qilong > > --- > > include/linux/pm_runtime.h | 30 ++ > > 1 file changed, 30 insertions(+) > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > index 4b708f4e8eed..6549ce764400 100644 > > --- a/include/linux/pm_runtime.h > > +++ b/include/linux/pm_runtime.h > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device > > *dev) > > return __pm_runtime_resume(dev, RPM_GET_PUT); > > } > > > > +/** > > + * pm_runtime_general_get - Bump up usage counter of a device and resume > > it. > > + * @dev: Target device. > > + * > > + * Increase runtime PM usage counter of @dev first, and carry out > > runtime-resume > > + * of it synchronously. If __pm_runtime_resume return negative > > value(device is in > > + * error state), we to need decrease the usage counter before it return. If > > + * __pm_runtime_resume return positive value, it means the runtime of > > device has > > + * already been in active state, and we let the new wrapper return zero > > instead. > > + * > > + * The possible return values of this function is zero or negative value. > > + * zero: > > + *- it means resume succeeed or runtime of device has already been > > active, the > > + * runtime PM usage counter of @dev remains incremented. > > + * negative: > > + *- it means failure and the runtime PM usage counter of @dev has been > > balanced. > > The kerneldoc above is kind of noisy and it is hard to figure out what > the helper really does from it. > > You could basically say something like "Resume @dev synchronously and > if that is successful, increment its runtime PM usage counter. Return > 0 if the runtime PM usage counter of @dev has been incremented or a > negative error code otherwise." > > > + */ > > +static inline int pm_runtime_general_get(struct device *dev) > > What about pm_runtime_resume_and_get()? We already have pm_runtime_get_if_active() - so perhaps pm_runtime_get_if_suspended() could be an option as well? > > > +{ > > + int ret = 0; > > This extra initialization is not necessary. > > You can initialize ret to the __pm_runtime_resume() return value right away. > > > + > > + ret = __pm_runtime_resume(dev, RPM_GET_PUT); > > + if (ret < 0) { > > + pm_runtime_put_noidle(dev); > > + return ret; > > + } > > + > > + return 0; > > +} > > + > > /** > > * pm_runtime_put - Drop device usage counter and queue up "idle check" if > > 0. > > * @dev: Target device. > > -- Kind regards Uffe
Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.
On Mon 09 Nov 2020 at 16:54, wenxu wrote: > 在 2020/11/9 21:24, Vlad Buslov 写道: >> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote: >>> From: wenxu >>> >>> Currently kernel tc subsystem can do conntrack in act_ct. But when several >>> fragment packets go through the act_ct, function tcf_ct_handle_fragments >>> will defrag the packets to a big one. But the last action will redirect >>> mirred to a device which maybe lead the reassembly big packet over the mtu >>> of target device. >>> >>> This patch add support for a xmit hook to mirred, that gets executed before >>> xmiting the packet. Then, when act_ct gets loaded, it configs that hook. >>> The frag xmit hook maybe reused by other modules. >>> >>> Signed-off-by: wenxu >>> --- >>> v2: Fix the crash for act_frag module without load >>> v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled >>> in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook >>> v4: using skb_protocol and fix line length exceeds 80 columns >>> v5: no change >>> >>> include/net/act_api.h | 16 + >>> net/sched/Kconfig | 13 >>> net/sched/Makefile | 1 + >>> net/sched/act_api.c| 51 +++ >>> net/sched/act_ct.c | 7 +++ >>> net/sched/act_frag.c | 164 >>> + >>> net/sched/act_mirred.c | 2 +- >>> 7 files changed, 253 insertions(+), 1 deletion(-) >>> create mode 100644 net/sched/act_frag.c >>> >>> diff --git a/include/net/act_api.h b/include/net/act_api.h >>> index 8721492..403a618 100644 >>> --- a/include/net/act_api.h >>> +++ b/include/net/act_api.h >>> @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct >>> tcf_proto *tp, >>> struct netlink_ext_ack *newchain); >>> struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, >>> struct tcf_chain *newchain); >>> + >>> +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff >>> *skb)); >>> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb, >>> + int (*xmit)(struct sk_buff *skb))); >>> +void tcf_clear_xmit_hook(void); >>> + >>> +#if IS_ENABLED(CONFIG_NET_ACT_FRAG) >>> +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff >>> *skb)); >>> +#else >>> +static inline int tcf_frag_xmit_hook(struct sk_buff *skb, >>> +int (*xmit)(struct sk_buff *skb)) >>> +{ >>> + return 0; >>> +} >>> +#endif >>> + >>> #endif /* CONFIG_NET_CLS_ACT */ >>> >>> static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, >>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig >>> index a3b37d8..9a240c7 100644 >>> --- a/net/sched/Kconfig >>> +++ b/net/sched/Kconfig >>> @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY >>> To compile this code as a module, choose M here: the >>> module will be called act_tunnel_key. >>> >>> +config NET_ACT_FRAG >>> + tristate "Packet fragmentation" >>> + depends on NET_CLS_ACT >>> + help >>> + Say Y here to allow fragmenting big packets when outputting >>> + with the mirred action. >>> + >>> + If unsure, say N. >>> + >>> + To compile this code as a module, choose M here: the >>> + module will be called act_frag. >>> + >> Just wondering, what is the motivation for putting the frag code into >> standalone module? It doesn't implement usual act_* interface and is not >> user-configurable. To me it looks like functionality that belongs to >> act_api. Am I missing something? > > The fragment operation is an single L3 action. > > So we put in an single modules. Maybe it is not proper to put in the act_api > directly. Okay. I have no strong opinion regarding this so it is better for maintainers to decide. > >>> config NET_ACT_CT >>> tristate "connection tracking tc action" >>> depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE >>> + depends on NET_ACT_FRAG >>> help >>> Say Y here to allow sending the packets to conntrack module. >>> >>> diff --git a/net/sched/Makefile b/net/sched/Makefile >>> index 66bbf9a..c146186 100644 >>> --- a/net/sched/Makefile >>> +++ b/net/sched/Makefile >>> @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o >>> obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o >>> obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o >>> obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o >>> +obj-$(CONFIG_NET_ACT_FRAG) += act_frag.o >>> obj-$(CONFIG_NET_ACT_CT) += act_ct.o >>> obj-$(CONFIG_NET_ACT_GATE) += act_gate.o >>> obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o >>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c >>> index f66417d..e7b501c 100644 >>> --- a/net/sched/act_api.c >>> +++ b/net/sched/act_api.c >>> @@ -22,6 +22,57 @@ >>> #include >>> #include >>> >>> +static int (*tcf_xmit_hook)(struct sk_buff *skb, >>> + int (*xmit)(struct
Re: [PATCH v4 5/7] can: update documentation for DLC usage in Classical CAN
On 09.11.20 15:50, Vincent MAILHOL wrote: On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote: The extension of struct can_frame with the len8_dlc element and the can_dlc naming issue required an update of the documentation. Additionally introduce the term 'Classical CAN' which has been established by CAN in Automation to separate the original CAN2.0 A/B from CAN FD. Updated some data structures and flags. Signed-off-by: Oliver Hartkopp --- Documentation/networking/can.rst | 68 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index ff05cbd05e0d..e17c6427bb3a 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to different addressing the socket, you can read(2) and write(2) from/to the socket or use send(2), sendto(2), sendmsg(2) and the recv* counterpart operations on the socket as usual. There are also CAN specific socket options described below. -The basic CAN frame structure and the sockaddr structure are defined -in include/linux/can.h: +The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure +and the sockaddr structure are defined in include/linux/can.h: .. code-block:: C struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ -__u8can_dlc; /* frame payload length in byte (0 .. 8) */ +union { +/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN) + * was previously named can_dlc so we need to carry that + * name for legacy support + */ +__u8 len; +__u8 can_dlc; /* deprecated */ +}; __u8__pad; /* padding */ __u8__res0; /* reserved / padding */ -__u8__res1; /* reserved / padding */ +__u8len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */ __u8data[8] __attribute__((aligned(8))); }; +Remark: The len element contains the payload length in bytes and should be +used instead of can_dlc. The deprecated can_dlc was misleadingly named as +it always contained the plain payload length in bytes and not the so called +'data length code' (DLC). + +To pass the raw DLC from/to a Classical CAN network device the len8_dlc +element can contain values 9 .. 15 when the len element is 8 (the real +payload length for all DLC values greater or equal to 8). The "Classical CAN network device" part could make the reader misunderstand that FD capable controllers can not handle Classical CAN frames with DLC greater than 8. All the CAN-FD controllers I am aware of can emit both Classical and FD frames. On the contrary, some Classical CAN controllers might not support sending DLCs greater than 8. Propose to add the nuance that this depends on the device property: +To pass the raw DLC from/to a capable network device +(c.f. cc-len8-dlc CAN device property), the len8_dlc element can +contain values 9 .. 15 when the len element is 8 (the real payload +length for all DLC values greater or equal to 8). This section only describes the Classical CAN data structure. I also thought about it - but I did not want to overload it with device properties. + The alignment of the (linear) payload data[] to a 64bit boundary allows the user to define their own structs and unions to easily access the CAN payload. There is no given byteorder on the CAN bus by default. A read(2) system call on a CAN_RAW socket transfers a struct can_frame to the user space. @@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific interface: int can_ifindex; union { /* transport protocol class address info (e.g. ISOTP) */ struct { canid_t rx_id, tx_id; } tp; +/* J1939 address information */ +struct { +/* 8 byte name when using dynamic addressing */ +__u64 name; + +/* pgn: + * 8 bit: PS in PDU2 case, else 0 + * 8 bit: PF + * 1 bit: DP + * 1 bit: reserved + */ +__u32 pgn; + +/* 1 byte address */ +__u8 addr; +} j1939; + /* reserved for future CAN protocols address information */ } can_addr; }; This looks like some J1939 code. Did you mix your patches? This belongs to "update data structures" in the commit message ;-) To determine the interface index an appropriate ioctl() has
Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 4:50 PM Ulf Hansson wrote: > > On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki wrote: > > > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong wrote: > > > > > > In many case, we need to check return value of pm_runtime_get_sync, but > > > it brings a trouble to the usage counter processing. Many callers forget > > > to decrease the usage counter when it failed. It has been discussed a > > > lot[0][1]. So we add a function to deal with the usage counter for better > > > coding. > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ > > > Signed-off-by: Zhang Qilong > > > --- > > > include/linux/pm_runtime.h | 30 ++ > > > 1 file changed, 30 insertions(+) > > > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > > index 4b708f4e8eed..6549ce764400 100644 > > > --- a/include/linux/pm_runtime.h > > > +++ b/include/linux/pm_runtime.h > > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device > > > *dev) > > > return __pm_runtime_resume(dev, RPM_GET_PUT); > > > } > > > > > > +/** > > > + * pm_runtime_general_get - Bump up usage counter of a device and resume > > > it. > > > + * @dev: Target device. > > > + * > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > runtime-resume > > > + * of it synchronously. If __pm_runtime_resume return negative > > > value(device is in > > > + * error state), we to need decrease the usage counter before it return. > > > If > > > + * __pm_runtime_resume return positive value, it means the runtime of > > > device has > > > + * already been in active state, and we let the new wrapper return zero > > > instead. > > > + * > > > + * The possible return values of this function is zero or negative value. > > > + * zero: > > > + *- it means resume succeeed or runtime of device has already been > > > active, the > > > + * runtime PM usage counter of @dev remains incremented. > > > + * negative: > > > + *- it means failure and the runtime PM usage counter of @dev has > > > been balanced. > > > > The kerneldoc above is kind of noisy and it is hard to figure out what > > the helper really does from it. > > > > You could basically say something like "Resume @dev synchronously and > > if that is successful, increment its runtime PM usage counter. Return > > 0 if the runtime PM usage counter of @dev has been incremented or a > > negative error code otherwise." > > > > > + */ > > > +static inline int pm_runtime_general_get(struct device *dev) > > > > What about pm_runtime_resume_and_get()? > > We already have pm_runtime_get_if_active() - so perhaps > pm_runtime_get_if_suspended() could be an option as well? It doesn't work this way, though. The "get" happens even if the device has not been suspended.
Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, Nov 9, 2020 at 4:50 PM zhangqilong wrote: > > > operation to deal with usage counter > > > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong > > wrote: > > > > > > In many case, we need to check return value of pm_runtime_get_sync, > > > but it brings a trouble to the usage counter processing. Many callers > > > forget to decrease the usage counter when it failed. It has been > > > discussed a lot[0][1]. So we add a function to deal with the usage > > > counter for better coding. > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951 > > > 48.10995-1-dinghao@zju.edu.cn/ > > > Signed-off-by: Zhang Qilong > > > --- > > > include/linux/pm_runtime.h | 30 ++ > > > 1 file changed, 30 insertions(+) > > > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > > index 4b708f4e8eed..6549ce764400 100644 > > > --- a/include/linux/pm_runtime.h > > > +++ b/include/linux/pm_runtime.h > > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device > > *dev) > > > return __pm_runtime_resume(dev, RPM_GET_PUT); } > > > > > > +/** > > > + * pm_runtime_general_get - Bump up usage counter of a device and > > resume it. > > > + * @dev: Target device. > > > + * > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > +runtime-resume > > > + * of it synchronously. If __pm_runtime_resume return negative > > > +value(device is in > > > + * error state), we to need decrease the usage counter before it > > > +return. If > > > + * __pm_runtime_resume return positive value, it means the runtime of > > > +device has > > > + * already been in active state, and we let the new wrapper return zero > > instead. > > > + * > > > + * The possible return values of this function is zero or negative value. > > > + * zero: > > > + *- it means resume succeeed or runtime of device has already been > > active, the > > > + * runtime PM usage counter of @dev remains incremented. > > > + * negative: > > > + *- it means failure and the runtime PM usage counter of @dev has > > been balanced. > > > > The kerneldoc above is kind of noisy and it is hard to figure out what the > > helper > > really does from it. > > > > You could basically say something like "Resume @dev synchronously and if > > that > > is successful, increment its runtime PM usage counter. Return > > 0 if the runtime PM usage counter of @dev has been incremented or a negative > > error code otherwise." > > > > How about the following description. > /** > 390 * pm_runtime_general_get - Bump up usage counter of a device and resume > it. > 391 * @dev: Target device. > 392 * > 393 * Increase runtime PM usage counter of @dev first, and carry out > runtime-resume > 394 * of it synchronously. If __pm_runtime_resume return negative > value(device is in > 395 * error state), we to need decrease the usage counter before it return. > If > 396 * __pm_runtime_resume return positive value, it means the runtime of > device has > 397 * already been in active state, and we let the new wrapper return zero > instead. > 398 * If you add the paragraph below, the one above becomes redundant IMV. > 399 * Resume @dev synchronously and if that is successful, and increment its > runtime "Resume @dev synchronously and if that is successful, increment its runtime" (drop the extra "and"). > 400 * PM usage counter if it turn out to equal to 0. The runtime PM usage > counter of The "if it turn out to equal to 0" phrase is redundant (and the grammar in it is incorrect). > 401 * @dev has been incremented or a negative error code otherwise. > 402 */ Why don't you use what I said verbatim?
[PATCH RFC] SUNRPC: Use zero-copy to perform socket send operations
Daire Byrne reports a ~50% aggregrate throughput regression on his Linux NFS server after commit da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends"), which replaced kernel_send_page() calls in NFSD's socket send path with calls to sock_sendmsg() using iov_iter. Investigation showed that tcp_sendmsg() was not using zero-copy to send the xdr_buf's bvec pages, but instead was relying on memcpy. Set up the socket and each msghdr that bears bvec pages to use the zero-copy mechanism in tcp_sendmsg. Reported-by: Daire Byrne BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439 Fixes: da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends") Signed-off-by: Chuck Lever --- net/sunrpc/socklib.c |5 - net/sunrpc/svcsock.c |1 + net/sunrpc/xprtsock.c |1 + 3 files changed, 6 insertions(+), 1 deletion(-) This patch does not fully resolve the issue. Daire reports high softIRQ activity after the patch is applied, and this activity seems to prevent full restoration of previous performance. diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index d52313af82bc..af47596a7bdd 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -226,9 +226,12 @@ static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, if (err < 0) return err; + msg->msg_flags |= MSG_ZEROCOPY; iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr), xdr->page_len + xdr->page_base); - return xprt_sendmsg(sock, msg, base + xdr->page_base); + err = xprt_sendmsg(sock, msg, base + xdr->page_base); + msg->msg_flags &= ~MSG_ZEROCOPY; + return err; } /* Common case: diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c2752e2b9ce3..c814b4953b15 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1176,6 +1176,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); + sock_set_flag(sk, SOCK_ZEROCOPY); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7090bbee0ec5..343c6396b297 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2175,6 +2175,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* socket options */ sock_reset_flag(sk, SOCK_LINGER); + sock_set_flag(sk, SOCK_ZEROCOPY); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; xprt_clear_connected(xprt);
Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter
On Mon, 9 Nov 2020 at 16:54, Rafael J. Wysocki wrote: > > On Mon, Nov 9, 2020 at 4:50 PM Ulf Hansson wrote: > > > > On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki wrote: > > > > > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong > > > wrote: > > > > > > > > In many case, we need to check return value of pm_runtime_get_sync, but > > > > it brings a trouble to the usage counter processing. Many callers forget > > > > to decrease the usage counter when it failed. It has been discussed a > > > > lot[0][1]. So we add a function to deal with the usage counter for > > > > better > > > > coding. > > > > > > > > [0]https://lkml.org/lkml/2020/6/14/88 > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/ > > > > Signed-off-by: Zhang Qilong > > > > --- > > > > include/linux/pm_runtime.h | 30 ++ > > > > 1 file changed, 30 insertions(+) > > > > > > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h > > > > index 4b708f4e8eed..6549ce764400 100644 > > > > --- a/include/linux/pm_runtime.h > > > > +++ b/include/linux/pm_runtime.h > > > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct > > > > device *dev) > > > > return __pm_runtime_resume(dev, RPM_GET_PUT); > > > > } > > > > > > > > +/** > > > > + * pm_runtime_general_get - Bump up usage counter of a device and > > > > resume it. > > > > + * @dev: Target device. > > > > + * > > > > + * Increase runtime PM usage counter of @dev first, and carry out > > > > runtime-resume > > > > + * of it synchronously. If __pm_runtime_resume return negative > > > > value(device is in > > > > + * error state), we to need decrease the usage counter before it > > > > return. If > > > > + * __pm_runtime_resume return positive value, it means the runtime of > > > > device has > > > > + * already been in active state, and we let the new wrapper return > > > > zero instead. > > > > + * > > > > + * The possible return values of this function is zero or negative > > > > value. > > > > + * zero: > > > > + *- it means resume succeeed or runtime of device has already been > > > > active, the > > > > + * runtime PM usage counter of @dev remains incremented. > > > > + * negative: > > > > + *- it means failure and the runtime PM usage counter of @dev has > > > > been balanced. > > > > > > The kerneldoc above is kind of noisy and it is hard to figure out what > > > the helper really does from it. > > > > > > You could basically say something like "Resume @dev synchronously and > > > if that is successful, increment its runtime PM usage counter. Return > > > 0 if the runtime PM usage counter of @dev has been incremented or a > > > negative error code otherwise." > > > > > > > + */ > > > > +static inline int pm_runtime_general_get(struct device *dev) > > > > > > What about pm_runtime_resume_and_get()? > > > > We already have pm_runtime_get_if_active() - so perhaps > > pm_runtime_get_if_suspended() could be an option as well? > > It doesn't work this way, though. > > The "get" happens even if the device has not been suspended. Yes, that's right - so pm_runtime_resume_and_get() is probably the best we can pick then. Kind regards Uffe