[PATCH v9,net-next,08/12] crypto: octeontx2: add LF framework

2020-11-09 Thread Srujana Challa
CPT RVU Local Functions(LFs) needs to be attached to the
PF/VF to submit the instructions to CPT.
This patch adds the interface to initialize and attach
the LFs. It also adds interface to register the LF's
interrupts.

Signed-off-by: Suheil Chandran 
Signed-off-by: Lukasz Bartosik 
Signed-off-by: Srujana Challa 
---
 drivers/crypto/marvell/octeontx2/Makefile |   2 +-
 .../marvell/octeontx2/otx2_cpt_common.h   |   4 +
 .../marvell/octeontx2/otx2_cpt_mbox_common.c  |  56 +++
 drivers/crypto/marvell/octeontx2/otx2_cptlf.c | 429 ++
 drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 283 
 drivers/crypto/marvell/octeontx2/otx2_cptpf.h |   2 +
 .../marvell/octeontx2/otx2_cptpf_mbox.c   |   8 +
 7 files changed, 783 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.c
 create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptlf.h

diff --git a/drivers/crypto/marvell/octeontx2/Makefile 
b/drivers/crypto/marvell/octeontx2/Makefile
index 3c4155446296..e47a55961bb8 100644
--- a/drivers/crypto/marvell/octeontx2/Makefile
+++ b/drivers/crypto/marvell/octeontx2/Makefile
@@ -2,6 +2,6 @@
 obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o
 
 octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
- otx2_cpt_mbox_common.o otx2_cptpf_ucode.o
+ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h 
b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index ae16dc102459..d5576f5d3b90 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -76,4 +76,8 @@ int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct 
pci_dev *pdev,
 u64 reg, u64 *val);
 int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev,
  u64 reg, u64 val);
+struct otx2_cptlfs_info;
+int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs);
+int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs);
+
 #endif /* __OTX2_CPT_COMMON_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c 
b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
index ef1291c4881b..0933031ac827 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020 Marvell. */
 
 #include "otx2_cpt_common.h"
+#include "otx2_cptlf.h"
 
 int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev)
 {
@@ -112,3 +113,58 @@ int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct 
pci_dev *pdev,
 
return otx2_cpt_send_mbox_msg(mbox, pdev);
 }
+
+int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs)
+{
+   struct otx2_mbox *mbox = lfs->mbox;
+   struct rsrc_attach *req;
+   int ret;
+
+   req = (struct rsrc_attach *)
+   otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
+   sizeof(struct msg_rsp));
+   if (req == NULL) {
+   dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n");
+   return -EFAULT;
+   }
+
+   req->hdr.id = MBOX_MSG_ATTACH_RESOURCES;
+   req->hdr.sig = OTX2_MBOX_REQ_SIG;
+   req->hdr.pcifunc = 0;
+   req->cptlfs = lfs->lfs_num;
+   ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev);
+   if (ret)
+   return ret;
+
+   if (!lfs->are_lfs_attached)
+   ret = -EINVAL;
+
+   return ret;
+}
+
+int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs)
+{
+   struct otx2_mbox *mbox = lfs->mbox;
+   struct rsrc_detach *req;
+   int ret;
+
+   req = (struct rsrc_detach *)
+   otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req),
+   sizeof(struct msg_rsp));
+   if (req == NULL) {
+   dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n");
+   return -EFAULT;
+   }
+
+   req->hdr.id = MBOX_MSG_DETACH_RESOURCES;
+   req->hdr.sig = OTX2_MBOX_REQ_SIG;
+   req->hdr.pcifunc = 0;
+   ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev);
+   if (ret)
+   return ret;
+
+   if (lfs->are_lfs_attached)
+   ret = -EINVAL;
+
+   return ret;
+}
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c 
b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
new file mode 100644
index ..e27ea8909368
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Marvell. */
+
+#include "otx2_cpt_common.h"
+#include "otx2_cptlf.h"
+#include "rvu_reg.h"
+
+#define CPT_TIMER_HOLD 0x03F
+#define CPT_COUNT_HOLD 32
+
+static void cptlf_do_set

[PATCH v9,net-next,11/12] crypto: octeontx2: add support to process the crypto request

2020-11-09 Thread Srujana Challa
Attach LFs to CPT VF to process the crypto requests and register
LF interrupts.

Signed-off-by: Suheil Chandran 
Signed-off-by: Lukasz Bartosik 
Signed-off-by: Srujana Challa 
---
 drivers/crypto/marvell/octeontx2/Makefile |   2 +-
 .../marvell/octeontx2/otx2_cpt_common.h   |   3 +
 .../marvell/octeontx2/otx2_cpt_reqmgr.h   | 145 +
 drivers/crypto/marvell/octeontx2/otx2_cptlf.h |   7 +
 .../marvell/octeontx2/otx2_cptvf_main.c   | 199 +++
 .../marvell/octeontx2/otx2_cptvf_mbox.c   |  26 +
 .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 534 ++
 7 files changed, 915 insertions(+), 1 deletion(-)
 create mode 100644 drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c

diff --git a/drivers/crypto/marvell/octeontx2/Makefile 
b/drivers/crypto/marvell/octeontx2/Makefile
index ef6fb2ab3571..41c0a5832b3f 100644
--- a/drivers/crypto/marvell/octeontx2/Makefile
+++ b/drivers/crypto/marvell/octeontx2/Makefile
@@ -4,6 +4,6 @@ obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o 
octeontx2-cptvf.o
 octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
  otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
 octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
-   otx2_cpt_mbox_common.o
+   otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h 
b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index ca220178e518..e41de466a3f7 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -17,6 +17,9 @@
 #define OTX2_CPT_MAX_VFS_NUM 128
 #define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \
(((blk) << 20) | ((slot) << 12) | (offs))
+#define OTX2_CPT_RVU_PFFUNC(pf, func)  \
+   pf) & RVU_PFVF_PF_MASK) << RVU_PFVF_PF_SHIFT) | \
+   (((func) & RVU_PFVF_FUNC_MASK) << RVU_PFVF_FUNC_SHIFT))
 
 #define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF
 #define OTX2_CPT_NAME_LENGTH 64
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h 
b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
index 9184f91c68c1..597a998c6df6 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
@@ -10,6 +10,22 @@
 /* Completion code size and initial value */
 #define OTX2_CPT_COMPLETION_CODE_SIZE 8
 #define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE
+/*
+ * Maximum total number of SG buffers is 100, we divide it equally
+ * between input and output
+ */
+#define OTX2_CPT_MAX_SG_IN_CNT  50
+#define OTX2_CPT_MAX_SG_OUT_CNT 50
+
+/* DMA mode direct or SG */
+#define OTX2_CPT_DMA_MODE_DIRECT 0
+#define OTX2_CPT_DMA_MODE_SG 1
+
+/* Context source CPTR or DPTR */
+#define OTX2_CPT_FROM_CPTR 0
+#define OTX2_CPT_FROM_DPTR 1
+
+#define OTX2_CPT_MAX_REQ_SIZE 65535
 
 union otx2_cpt_opcode {
u16 flags;
@@ -19,6 +35,13 @@ union otx2_cpt_opcode {
} s;
 };
 
+struct otx2_cptvf_request {
+   u32 param1;
+   u32 param2;
+   u16 dlen;
+   union otx2_cpt_opcode opcode;
+};
+
 /*
  * CPT_INST_S software command definitions
  * Words EI (0-3)
@@ -48,4 +71,126 @@ struct otx2_cpt_iq_command {
union otx2_cpt_iq_cmd_word3 cptr;
 };
 
+struct otx2_cpt_pending_entry {
+   void *completion_addr;  /* Completion address */
+   void *info;
+   /* Kernel async request callback */
+   void (*callback)(int status, void *arg1, void *arg2);
+   struct crypto_async_request *areq; /* Async request callback arg */
+   u8 resume_sender;   /* Notify sender to resume sending requests */
+   u8 busy;/* Entry status (free/busy) */
+};
+
+struct otx2_cpt_pending_queue {
+   struct otx2_cpt_pending_entry *head; /* Head of the queue */
+   u32 front;  /* Process work from here */
+   u32 rear;   /* Append new work here */
+   u32 pending_count;  /* Pending requests count */
+   u32 qlen;   /* Queue length */
+   spinlock_t lock;/* Queue lock */
+};
+
+struct otx2_cpt_buf_ptr {
+   u8 *vptr;
+   dma_addr_t dma_addr;
+   u16 size;
+};
+
+union otx2_cpt_ctrl_info {
+   u32 flags;
+   struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+   u32 reserved_6_31:26;
+   u32 grp:3;  /* Group bits */
+   u32 dma_mode:2; /* DMA mode */
+   u32 se_req:1;   /* To SE core */
+#else
+   u32 se_req:1;   /* To SE core */
+   u32 dma_mode:2; /* DMA mode */
+   u32 grp:3;  /* Group bits */
+   u32 reserved_6_31:26;
+#endif
+   } s;
+};
+
+struct otx2_cpt_req_info {
+   /* Kernel async request callback */
+   void (*callback)(int status, void *arg1, void *arg2);
+   struct crypto_async_req

Re: [Linux-kernel-mentees] [PATCH net v2] Bluetooth: Fix slab-out-of-bounds read in hci_le_direct_adv_report_evt()

2020-11-09 Thread Marcel Holtmann
Hi Peilin,

> `num_reports` is not being properly checked. A malformed event packet with
> a large `num_reports` number makes hci_le_direct_adv_report_evt() read out
> of bounds. Fix it.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 2f010b55884e ("Bluetooth: Add support for handling LE Direct 
> Advertising Report events")
> Reported-and-tested-by: syzbot+24ebd650e20bd263c...@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=24ebd650e20bd263ca01
> Signed-off-by: Peilin Ye 
> ---
> Change in v2:
>- add "Cc: stable@" tag.
> 
> net/bluetooth/hci_event.c | 12 +---
> 1 file changed, 5 insertions(+), 7 deletions(-)
> 
> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
> index 4b7fc430793c..aec43ae488d1 100644
> --- a/net/bluetooth/hci_event.c
> +++ b/net/bluetooth/hci_event.c
> @@ -5863,21 +5863,19 @@ static void hci_le_direct_adv_report_evt(struct 
> hci_dev *hdev,
>struct sk_buff *skb)
> {
>   u8 num_reports = skb->data[0];
> - void *ptr = &skb->data[1];
> + struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1];
> 
> - hci_dev_lock(hdev);
> + if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1)
> + return;
> 
> - while (num_reports--) {
> - struct hci_ev_le_direct_adv_info *ev = ptr;
> + hci_dev_lock(hdev);
> 
> + for (; num_reports; num_reports--, ev++)
>   process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
>  ev->bdaddr_type, &ev->direct_addr,
>  ev->direct_addr_type, ev->rssi, NULL, 0,
>  false);
> 
> - ptr += sizeof(*ev);
> - }
> -
>   hci_dev_unlock(hdev);
> }

patch has been applied to bluetooth-next tree.

Regards

Marcel



Re: [PATCH v7 1/5] Bluetooth: Interleave with allowlist scan

2020-11-09 Thread Marcel Holtmann
Hi Howard,

> This patch implements the interleaving between allowlist scan and
> no-filter scan. It'll be used to save power when at least one monitor is
> registered and at least one pending connection or one device to be
> scanned for.
> 
> The durations of the allowlist scan and the no-filter scan are
> controlled by MGMT command: Set Default System Configuration. The
> default values are set randomly for now.
> 
> Signed-off-by: Howard Chung 
> Reviewed-by: Alain Michaud 
> Reviewed-by: Manish Mandlik 
> ---
> 
> Changes in v7:
> - Fix bt_dev_warn argument type warning
> 
> Changes in v6:
> - Set parameter EnableAdvMonInterleaveScan to 1 byte long
> 
> Changes in v5:
> - Rename 'adv_monitor' from many functions/variables
> - Move __hci_update_interleaved_scan into hci_req_add_le_passive_scan
> - Update the logic of update_adv_monitor_scan_state
> 
> Changes in v4:
> - Rebase to bluetooth-next/master (previous 2 patches are applied)
> - Fix over 80 chars limit in mgmt_config.c
> - Set EnableAdvMonInterleaveScan default to Disable
> 
> Changes in v3:
> - Remove 'Bluez' prefix
> 
> Changes in v2:
> - remove 'case 0x001c' in mgmt_config.c
> 
> include/net/bluetooth/hci_core.h |  10 +++
> net/bluetooth/hci_core.c |   4 +
> net/bluetooth/hci_request.c  | 136 +--
> net/bluetooth/mgmt_config.c  |  10 +++
> 4 files changed, 153 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/bluetooth/hci_core.h 
> b/include/net/bluetooth/hci_core.h
> index 9873e1c8cd163..cfede18709d8f 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -361,6 +361,8 @@ struct hci_dev {
>   __u8ssp_debug_mode;
>   __u8hw_error_code;
>   __u32   clock;
> + __u16   advmon_allowlist_duration;
> + __u16   advmon_no_filter_duration;
> 
>   __u16   devid_source;
>   __u16   devid_vendor;
> @@ -542,6 +544,14 @@ struct hci_dev {
>   struct delayed_work rpa_expired;
>   bdaddr_trpa;
> 
> + enum {
> + INTERLEAVE_SCAN_NONE,
> + INTERLEAVE_SCAN_NO_FILTER,
> + INTERLEAVE_SCAN_ALLOWLIST
> + } interleave_scan_state;
> +
> + struct delayed_work interleave_scan;
> +
> #if IS_ENABLED(CONFIG_BT_LEDS)
>   struct led_trigger  *power_led;
> #endif
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 502552d6e9aff..65b7b74baba4c 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -3592,6 +3592,10 @@ struct hci_dev *hci_alloc_dev(void)
>   hdev->cur_adv_instance = 0x00;
>   hdev->adv_instance_timeout = 0;
> 
> + /* The default values will be chosen in the future */
> + hdev->advmon_allowlist_duration = 300;
> + hdev->advmon_no_filter_duration = 500;
> +
>   hdev->sniff_max_interval = 800;
>   hdev->sniff_min_interval = 80;
> 
> diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
> index 6f12bab4d2fa6..70ea126f56282 100644
> --- a/net/bluetooth/hci_request.c
> +++ b/net/bluetooth/hci_request.c
> @@ -378,6 +378,58 @@ void __hci_req_write_fast_connectable(struct hci_request 
> *req, bool enable)
>   hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
> }
> 
> +static void start_interleave_scan(struct hci_dev *hdev)
> +{
> + hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER;
> + queue_delayed_work(hdev->req_workqueue,
> +&hdev->interleave_scan, 0);
> +}
> +
> +static bool is_interleave_scanning(struct hci_dev *hdev)
> +{
> + return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE;
> +}
> +
> +static void cancel_interleave_scan(struct hci_dev *hdev)
> +{
> + bt_dev_dbg(hdev, "%s cancelling interleave scan", hdev->name);
> +
> + cancel_delayed_work_sync(&hdev->interleave_scan);
> +
> + hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE;
> +}
> +
> +/* Return true if interleave_scan wasn't started until exiting this function,
> + * otherwise, return false
> + */
> +static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
> +{
> + if (hci_is_adv_monitoring(hdev) &&
> + !(list_empty(&hdev->pend_le_conns) &&
> +   list_empty(&hdev->pend_le_reports))) {
> + if (!is_interleave_scanning(hdev)) {

This extra indentation is rather useless here. Just do another &&.

> + /* If there is at least one ADV monitors and one pending
> +  * LE connection or one device to be scanned for, we
> +  * should alternate between allowlist scan and one
> +  * without any filters to save power.
> +  */
> + start_interleave_scan(hdev);
> + bt_dev_dbg(hdev, "%s starting interleave scan",
> +hdev->name);
> + return true;
> + }

Re: [PATCH v7 2/5] Bluetooth: Handle system suspend resume case

2020-11-09 Thread Marcel Holtmann
Hi Howard,

> This patch adds code to handle the system suspension during interleave
> scan. The interleave scan will be canceled when the system is going to
> sleep, and will be restarted after waking up.
> 
> Commit-changes 5:
> - Remove the change in hci_req_config_le_suspend_scan

this does not belong here. So please avoid this in the future.

> Signed-off-by: Howard Chung 
> Reviewed-by: Alain Michaud 
> Reviewed-by: Manish Mandlik 
> Reviewed-by: Abhishek Pandit-Subedi 
> Reviewed-by: Miao-chen Chou 
> ---
> 
> (no changes since v1)
> 
> net/bluetooth/hci_request.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)

Patch has been applied to bluetooth-next tree.

Regards

Marcel



Re: [PATCH v7 4/5] mgmt: Add supports of variable length parameter in mgmt_config

2020-11-09 Thread Marcel Holtmann
Hi Howard,

> This adds support of variable length parameter in mgmt_config.

I don’t see how this commit message describes the change correctly.

> 
> Signed-off-by: Howard Chung 
> ---
> 
> (no changes since v1)
> 
> net/bluetooth/mgmt_config.c | 140 +---
> 1 file changed, 84 insertions(+), 56 deletions(-)
> 
> diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
> index 2d3ad288c78ac..b735e59c7fd51 100644
> --- a/net/bluetooth/mgmt_config.c
> +++ b/net/bluetooth/mgmt_config.c
> @@ -11,72 +11,100 @@
> #include "mgmt_util.h"
> #include "mgmt_config.h"
> 
> -#define HDEV_PARAM_U16(_param_code_, _param_name_) \
> -{ \
> - { cpu_to_le16(_param_code_), sizeof(__u16) }, \
> - { cpu_to_le16(hdev->_param_name_) } \
> -}
> +#define HDEV_PARAM_U16(_param_name_) \
> + struct {\
> + struct mgmt_tlv entry; \
> + __le16 value; \
> + } __packed _param_name_
> 
> -#define HDEV_PARAM_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \
> -{ \
> - { cpu_to_le16(_param_code_), sizeof(__u16) }, \
> - { cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) } \
> -}
> +#define TLV_SET_U16(_param_code_, _param_name_) \
> + { \
> + { cpu_to_le16(_param_code_), sizeof(__u16) }, \
> +   cpu_to_le16(hdev->_param_name_) \
> + }
> +
> +#define TLV_SET_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \
> + { \
> + { cpu_to_le16(_param_code_), sizeof(__u16) }, \
> +   cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) \
> + }
> 
> int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
>  u16 data_len)
> {
> - struct {
> - struct mgmt_tlv entry;
> - union {
> - /* This is a simplification for now since all values
> -  * are 16 bits.  In the future, this code may need
> -  * refactoring to account for variable length values
> -  * and properly calculate the required buffer size.
> -  */
> - __le16 value;
> - };
> - } __packed params[] = {
> + int ret;
> + struct mgmt_rp_read_def_system_config {
>   /* Please see mgmt-api.txt for documentation of these values */
> - HDEV_PARAM_U16(0x, def_page_scan_type),
> - HDEV_PARAM_U16(0x0001, def_page_scan_int),
> - HDEV_PARAM_U16(0x0002, def_page_scan_window),
> - HDEV_PARAM_U16(0x0003, def_inq_scan_type),
> - HDEV_PARAM_U16(0x0004, def_inq_scan_int),
> - HDEV_PARAM_U16(0x0005, def_inq_scan_window),
> - HDEV_PARAM_U16(0x0006, def_br_lsto),
> - HDEV_PARAM_U16(0x0007, def_page_timeout),
> - HDEV_PARAM_U16(0x0008, sniff_min_interval),
> - HDEV_PARAM_U16(0x0009, sniff_max_interval),
> - HDEV_PARAM_U16(0x000a, le_adv_min_interval),
> - HDEV_PARAM_U16(0x000b, le_adv_max_interval),
> - HDEV_PARAM_U16(0x000c, def_multi_adv_rotation_duration),
> - HDEV_PARAM_U16(0x000d, le_scan_interval),
> - HDEV_PARAM_U16(0x000e, le_scan_window),
> - HDEV_PARAM_U16(0x000f, le_scan_int_suspend),
> - HDEV_PARAM_U16(0x0010, le_scan_window_suspend),
> - HDEV_PARAM_U16(0x0011, le_scan_int_discovery),
> - HDEV_PARAM_U16(0x0012, le_scan_window_discovery),
> - HDEV_PARAM_U16(0x0013, le_scan_int_adv_monitor),
> - HDEV_PARAM_U16(0x0014, le_scan_window_adv_monitor),
> - HDEV_PARAM_U16(0x0015, le_scan_int_connect),
> - HDEV_PARAM_U16(0x0016, le_scan_window_connect),
> - HDEV_PARAM_U16(0x0017, le_conn_min_interval),
> - HDEV_PARAM_U16(0x0018, le_conn_max_interval),
> - HDEV_PARAM_U16(0x0019, le_conn_latency),
> - HDEV_PARAM_U16(0x001a, le_supv_timeout),
> - HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b,
> - def_le_autoconnect_timeout),
> - HDEV_PARAM_U16(0x001d, advmon_allowlist_duration),
> - HDEV_PARAM_U16(0x001e, advmon_no_filter_duration),
> + HDEV_PARAM_U16(def_page_scan_type);
> + HDEV_PARAM_U16(def_page_scan_int);
> + HDEV_PARAM_U16(def_page_scan_window);
> + HDEV_PARAM_U16(def_inq_scan_type);
> + HDEV_PARAM_U16(def_inq_scan_int);
> + HDEV_PARAM_U16(def_inq_scan_window);
> + HDEV_PARAM_U16(def_br_lsto);
> + HDEV_PARAM_U16(def_page_timeout);
> + HDEV_PARAM_U16(sniff_min_interval);
> + HDEV_PARAM_U16(sniff_max_interval);
> + HDEV_PARAM_U16(le_adv_min_interval);
> + HDEV_PARAM_U16(le_adv_max_interval);
> + HDEV_PARAM_U16(def_multi_adv_rotation_duration);
> + HDEV_PARAM_U16(le_scan_interval);
>

[PATCH ethtool 0/2] netlink: data lifetime error fixes

2020-11-09 Thread Michal Kubecek
Fixes of two data lifetime bugs found by testing with valgrind: one use
after free, one memory leak.

Michal Kubecek (2):
  netlink: fix use after free in netlink_run_handler()
  netlink: fix leaked instances of struct nl_socket

 netlink/netlink.c | 21 +++--
 netlink/nlsock.c  |  3 +++
 2 files changed, 18 insertions(+), 6 deletions(-)

-- 
2.29.2



[PATCH ethtool 1/2] netlink: fix use after free in netlink_run_handler()

2020-11-09 Thread Michal Kubecek
Valgrind detected use after free in netlink_run_handler(): some members of
struct nl_context are accessed after the netlink context is freed by
netlink_done(). Use local variables to store the two flags and check them
instead.

Fixes: 6c19c0d559c8 ("netlink: use genetlink ops information to decide about 
fallback")
Signed-off-by: Michal Kubecek 
---
 netlink/netlink.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/netlink/netlink.c b/netlink/netlink.c
index f655f6ea25b7..bdd3048e 100644
--- a/netlink/netlink.c
+++ b/netlink/netlink.c
@@ -457,6 +457,7 @@ void netlink_run_handler(struct cmd_context *ctx, nl_func_t 
nlfunc,
 bool no_fallback)
 {
bool wildcard = ctx->devname && !strcmp(ctx->devname, WILDCARD_DEVNAME);
+   bool wildcard_unsupported, ioctl_fallback;
struct nl_context *nlctx;
const char *reason;
int ret;
@@ -478,14 +479,17 @@ void netlink_run_handler(struct cmd_context *ctx, 
nl_func_t nlfunc,
nlctx = ctx->nlctx;
 
ret = nlfunc(ctx);
+   wildcard_unsupported = nlctx->wildcard_unsupported;
+   ioctl_fallback = nlctx->ioctl_fallback;
netlink_done(ctx);
-   if (no_fallback || ret != -EOPNOTSUPP || !nlctx->ioctl_fallback) {
-   if (nlctx->wildcard_unsupported)
+
+   if (no_fallback || ret != -EOPNOTSUPP || !ioctl_fallback) {
+   if (wildcard_unsupported)
fprintf(stderr, "%s\n",
"subcommand does not support wildcard dump");
exit(ret >= 0 ? ret : 1);
}
-   if (nlctx->wildcard_unsupported)
+   if (wildcard_unsupported)
reason = "subcommand does not support wildcard dump";
else
reason = "kernel netlink support for subcommand missing";
-- 
2.29.2



[PATCH ethtool 2/2] netlink: fix leaked instances of struct nl_socket

2020-11-09 Thread Michal Kubecek
Valgrind detected memory leaks caused by missing cleanup of netlink
context's ethnl_socket, ethnl2_socket and rtnl_socket. Also, contrary to
its description, nlsock_done() does not free struct nl_socket itself.
Fix nlsock_done() to free the structure and use it to dispose of sockets
pointed to by struct nl_context members.

Fixes: 50efb3cdd2bb ("netlink: netlink socket wrapper and helpers")
Fixes: 87307c30724d ("netlink: initialize ethtool netlink socket")
Fixes: 7f3585b22a4b ("netlink: add handler for permaddr (-P)")
Signed-off-by: Michal Kubecek 
---
 netlink/netlink.c | 11 ---
 netlink/nlsock.c  |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/netlink/netlink.c b/netlink/netlink.c
index bdd3048e..ffe06339f099 100644
--- a/netlink/netlink.c
+++ b/netlink/netlink.c
@@ -435,11 +435,16 @@ out_free:
 
 static void netlink_done(struct cmd_context *ctx)
 {
-   if (!ctx->nlctx)
+   struct nl_context *nlctx = ctx->nlctx;
+
+   if (!nlctx)
return;
 
-   free(ctx->nlctx->ops_info);
-   free(ctx->nlctx);
+   nlsock_done(nlctx->ethnl_socket);
+   nlsock_done(nlctx->ethnl2_socket);
+   nlsock_done(nlctx->rtnl_socket);
+   free(nlctx->ops_info);
+   free(nlctx);
ctx->nlctx = NULL;
cleanup_all_strings();
 }
diff --git a/netlink/nlsock.c b/netlink/nlsock.c
index ef31d8c33b29..0ec2738d81d2 100644
--- a/netlink/nlsock.c
+++ b/netlink/nlsock.c
@@ -395,8 +395,11 @@ out_msgbuff:
  */
 void nlsock_done(struct nl_socket *nlsk)
 {
+   if (!nlsk)
+   return;
if (nlsk->sk)
mnl_socket_close(nlsk->sk);
msgbuff_done(&nlsk->msgbuff);
memset(nlsk, '\0', sizeof(*nlsk));
+   free(nlsk);
 }
-- 
2.29.2



Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors

2020-11-09 Thread Vladimir Oltean
On Mon, Nov 09, 2020 at 12:05:19PM +0100, Tobias Waldekranz wrote:
> On Mon, Nov 09, 2020 at 12:03, Vladimir Oltean  wrote:
> > On Mon, Nov 09, 2020 at 09:09:37AM +0100, Tobias Waldekranz wrote:
> >> one. But now you have also increased the background load of an already
> >> choked resource, the MDIO bus.
> >
> > In practice, DSA switches are already very demanding of their management
> > interface throughput, for PTP and things like that. I do expect that if
> > you spent any significant amount of time with DSA, you already know the
> > ins and outs of your MDIO/SPI/I2C controller and it would already be
> > optimized for efficiency. But ok, we can add this to the list of cons.
>
> You are arguing for my position though, no? Yes it is demanding; that is
> why we must allocate it carefully.

Yes, if the change brings additional load to the MDIO/SPI/I2C link and
doesn't bring any benefit, then it makes sense to skip it.

> > So there you have it, it's not that bad. More work needs to be done, but
> > IMO it's still workable.
>
> If you bypass learning on all frames sent from the CPU (as today), yes I
> agree that you should be able to solve it with static entries. But I
> think that you will have lots of weird problems with initial packet loss
> as the FDB updates are not synchronous with the packet flow. I.e. the
> bridge will tell DSA to update the entry, but the update in HW will
> occur some time later when the workqueue actually performs the
> operation.

I don't know how bad this is in practice. It's surely better than
waiting 5 minutes though.

> > But now maybe it makes more sense to treat the switches that perform
> > hardware SA learning on the CPU port separately, after I've digested
> > this a bit.
>
> Yes, please. Because it will be impossible to add tx forward offloading
> otherwise.

Ok, so this change, when applied to mv88e6xxx, would preclude you from
using FORWARD frames for your other application of that feature, unless
you explicitly turn off SA learning for FORWARD frames coming the CPU
port, case in which you would still be ok.

I need to sit on this for a while. How many DSA drivers do we have that
don't do SA learning in hardware for CPU-injected packets? ocelot/felix
and mv88e6xxx? Who else? Because if there aren't that many (or any at
all except for these two), then I could try to spend some time and see
how Felix behaves when I send FORWARD frames to it. Then we could go on
full blast with the other alternative, to force-enable address learning
from the CPU port, and declare this one as too complicated and not worth
the effort.


Re: [PATCH] Bluetooth: Resume advertising after LE connection

2020-11-09 Thread Marcel Holtmann
Hi Daniel,

> When an LE connection request is made, advertising is disabled and never
> resumed. When a client has an active advertisement, this is disruptive.
> This change adds resume logic for client-configured (non-directed)
> advertisements after the connection attempt.
> 
> The patch was tested by registering an advertisement, initiating an LE
> connection from a remote peer, and verifying that the advertisement is
> re-activated after the connection is established. This is performed on
> Hatch and Kukui Chromebooks.
> 
> Reviewed-by: Abhishek Pandit-Subedi 
> Signed-off-by: Daniel Winkler 

in the future, please sure that the originator Signed-off-by comes first and
the Reviewed-by lines after it

> ---
> 
> net/bluetooth/hci_conn.c| 12 ++--
> net/bluetooth/hci_request.c | 21 -
> net/bluetooth/hci_request.h |  2 ++
> 3 files changed, 28 insertions(+), 7 deletions(-)

Patch has been applied to bluetooth-next tree.

Regards

Marcel



Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors

2020-11-09 Thread Vladimir Oltean
On Mon, Nov 09, 2020 at 02:31:11PM +0200, Vladimir Oltean wrote:
> I need to sit on this for a while. How many DSA drivers do we have that
> don't do SA learning in hardware for CPU-injected packets? ocelot/felix
> and mv88e6xxx? Who else? Because if there aren't that many (or any at
> all except for these two), then I could try to spend some time and see
> how Felix behaves when I send FORWARD frames to it. Then we could go on
> full blast with the other alternative, to force-enable address learning
> from the CPU port, and declare this one as too complicated and not worth
> the effort.

In fact I'm not sure that I should be expecting an answer to this
question. We can evaluate the other alternative in parallel. Would you
be so kind to send some sort of RFC for your TX-side offload_fwd_mark so
that I could test with the hardware I have, and get a better understanding
of the limitations there?


[PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift

2020-11-09 Thread Colin King
From: Colin Ian King 

The left shift of u16 variable high is promoted to the type int and
then sign extended to a 64 bit u64 value.  If the top bit of high is
set then the upper 32 bits of the result end up being set by the
sign extension. Fix this by explicitly casting the value in high to
a u64 before left shifting by 16 places.

Also, remove the initialisation of variable value to 0 at the start
of each loop iteration as the value is never read and hence the
assignment it is redundant.

Addresses-Coverity: ("Unintended sign extension")
Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek 
switches")
Signed-off-by: Colin Ian King 
---
 drivers/net/dsa/hirschmann/hellcreek.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c 
b/drivers/net/dsa/hirschmann/hellcreek.c
index dfa66f7260d6..d42f40c76ba5 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -308,7 +308,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch 
*ds, int port,
const struct hellcreek_counter *counter = &hellcreek_counter[i];
u8 offset = counter->offset + port * 64;
u16 high, low;
-   u64 value = 0;
+   u64 value;
 
mutex_lock(&hellcreek->reg_lock);
 
@@ -320,7 +320,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch 
*ds, int port,
 */
high  = hellcreek_read(hellcreek, HR_CRDH);
low   = hellcreek_read(hellcreek, HR_CRDL);
-   value = (high << 16) | low;
+   value = ((u64)high << 16) | low;
 
hellcreek_port->counter_values[i] += value;
data[i] = hellcreek_port->counter_values[i];
-- 
2.28.0



[PATCH net-next 1/1] net: phy: Allow mdio buses to probe C45 before falling back to C22

2020-11-09 Thread Wong Vee Khee
This patch makes mdiobus_scan() to try on C45 first as C45 can access
all devices. This allows the function available for the PHY that
supports for both C45 and C22.

Reviewed-by: Voon Weifeng 
Reviewed-by: Ong Boon Leong 
Signed-off-by: Wong Vee Khee 
---
 drivers/net/phy/mdio_bus.c | 5 +
 include/linux/phy.h| 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 56094dd6bf26..372d0d088f7e 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -691,6 +691,11 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int 
addr)
if (IS_ERR(phydev))
phydev = get_phy_device(bus, addr, true);
break;
+   case MDIOBUS_C45_C22:
+   phydev = get_phy_device(bus, addr, true);
+   if (IS_ERR(phydev))
+   phydev = get_phy_device(bus, addr, false);
+   break;
}
 
if (IS_ERR(phydev))
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 189bc9881ea6..73d9be2c00f4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -360,6 +360,7 @@ struct mii_bus {
MDIOBUS_C22,
MDIOBUS_C45,
MDIOBUS_C22_C45,
+   MDIOBUS_C45_C22,
} probe_capabilities;
 
/** @shared_lock: protect access to the shared element */
-- 
2.17.0



Re: [PATCH stable] net: sch_generic: fix the missing new qdisc assignment bug

2020-11-09 Thread Greg KH
On Tue, Nov 03, 2020 at 11:25:38AM +0800, Yunsheng Lin wrote:
> commit 2fb541c862c9 ("net: sch_generic: aviod concurrent reset and enqueue op 
> for lockless qdisc")
> 
> When the above upstream commit is backported to stable kernel,
> one assignment is missing, which causes two problems reported
> by Joakim and Vishwanath, see [1] and [2].
> 
> So add the assignment back to fix it.
> 
> 1. https://www.spinics.net/lists/netdev/msg693916.html
> 2. https://www.spinics.net/lists/netdev/msg695131.html
> 
> Fixes: 749cc0b0c7f3 ("net: sch_generic: aviod concurrent reset and enqueue op 
> for lockless qdisc")
> Signed-off-by: Yunsheng Lin 
> ---
>  net/sched/sch_generic.c | 3 +++
>  1 file changed, 3 insertions(+)

What kernel tree(s) does this need to be backported to?

thanks,

greg k-h


Re: [PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift

2020-11-09 Thread Kurt Kanzenbach
On Mon Nov 09 2020, Colin King wrote:
> From: Colin Ian King 
>
> The left shift of u16 variable high is promoted to the type int and
> then sign extended to a 64 bit u64 value.  If the top bit of high is
> set then the upper 32 bits of the result end up being set by the
> sign extension. Fix this by explicitly casting the value in high to
> a u64 before left shifting by 16 places.
>
> Also, remove the initialisation of variable value to 0 at the start
> of each loop iteration as the value is never read and hence the
> assignment it is redundant.
>
> Addresses-Coverity: ("Unintended sign extension")
> Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek 
> switches")
> Signed-off-by: Colin Ian King 
> ---
>  drivers/net/dsa/hirschmann/hellcreek.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/dsa/hirschmann/hellcreek.c 
> b/drivers/net/dsa/hirschmann/hellcreek.c
> index dfa66f7260d6..d42f40c76ba5 100644
> --- a/drivers/net/dsa/hirschmann/hellcreek.c
> +++ b/drivers/net/dsa/hirschmann/hellcreek.c
> @@ -308,7 +308,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch 
> *ds, int port,
>   const struct hellcreek_counter *counter = &hellcreek_counter[i];
>   u8 offset = counter->offset + port * 64;
>   u16 high, low;
> - u64 value = 0;
> + u64 value;
>  
>   mutex_lock(&hellcreek->reg_lock);
>  
> @@ -320,7 +320,7 @@ static void hellcreek_get_ethtool_stats(struct dsa_switch 
> *ds, int port,
>*/
>   high  = hellcreek_read(hellcreek, HR_CRDH);
>   low   = hellcreek_read(hellcreek, HR_CRDL);
> - value = (high << 16) | low;
> + value = ((u64)high << 16) | low;

Looks good to me. Thank you.

Thanks,
Kurt


signature.asc
Description: PGP signature


[PATCH][next] mptcp: fix a dereference of pointer before msk is null checked.

2020-11-09 Thread Colin King
From: Colin Ian King 

Currently the assignment of pointer net from the sock_net(sk) call
is potentially dereferencing a null pointer sk. sk points to the
same location as pointer msk and msk is being null checked after
the sock_net call.  Fix this by calling sock_net after the null
check on pointer msk.

Addresses-Coverity: ("Dereference before null check")
Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Signed-off-by: Colin Ian King 
---
 net/mptcp/pm_netlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index ed60538df7b2..e76879ea5a30 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -206,13 +206,15 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer);
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
-   struct net *net = sock_net(sk);
+   struct net *net;
 
pr_debug("msk=%p", msk);
 
if (!msk)
return;
 
+   net = sock_net(sk);
+
if (inet_sk_state_load(sk) == TCP_CLOSE)
return;
 
-- 
2.28.0



[PATCH] net: tcp: ratelimit warnings in tcp_recvmsg

2020-11-09 Thread menglong8 . dong
From: Menglong Dong 

'before(*seq, TCP_SKB_CB(skb)->seq) == true' means that one or more
skbs are lost somehow. Once this happen, it seems that it will
never recover automatically. As a result, a warning will be printed
and a '-EAGAIN' will be returned in non-block mode.

As a general suituation, users call 'poll' on a socket and then receive
skbs with 'recv' in non-block mode. This mode will make every
arriving skb of the socket trigger a warning. Plenty of skbs will cause
high rate of kernel log.

Besides, WARN is for indicating kernel bugs only and should not be
user-triggable. Replace it with 'net_warn_ratelimited' here.

Signed-off-by: Menglong Dong 
---
 net/ipv4/tcp.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b2bc3d7fe9e8..5e38dfd03036 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2093,11 +2093,12 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int nonblock,
/* Now that we have two receive queues this
 * shouldn't happen.
 */
-   if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
-"TCP recvmsg seq # bug: copied %X, seq %X, 
rcvnxt %X, fl %X\n",
-*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
-flags))
+   if (unlikely(before(*seq, TCP_SKB_CB(skb)->seq))) {
+   net_warn_ratelimited("TCP recvmsg seq # bug: 
copied %X, seq %X, rcvnxt %X, fl %X\n",
+*seq, 
TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+flags);
break;
+   }
 
offset = *seq - TCP_SKB_CB(skb)->seq;
if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
@@ -2108,9 +2109,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int nonblock,
goto found_ok_skb;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
-   WARN(!(flags & MSG_PEEK),
-"TCP recvmsg seq # bug 2: copied %X, seq %X, 
rcvnxt %X, fl %X\n",
-*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
+
+   if (!(flags & MSG_PEEK))
+   net_warn_ratelimited("TCP recvmsg seq # bug 2: 
copied %X, seq %X, rcvnxt %X, fl %X\n",
+*seq, 
TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+flags);
}
 
/* Well, if we have backlog, try to process it now yet. */
-- 
2.25.1




Re: [PATCH V3] fsl/fman: add missing put_devcie() call in fman_port_probe()

2020-11-09 Thread yukuai (C)

在 2020/11/08 6:09, Jakub Kicinski 写道:

On Sat, 7 Nov 2020 17:09:25 +0800 Yu Kuai wrote:

if of_find_device_by_node() succeed, fman_port_probe() doesn't have a
corresponding put_device(). Thus add jump target to fix the exception
handling for this function implementation.

Fixes: 0572054617f3 ("fsl/fman: fix dereference null return value")
Signed-off-by: Yu Kuai 



@@ -1792,20 +1792,20 @@ static int fman_port_probe(struct platform_device 
*of_dev)
if (!fm_node) {
dev_err(port->dev, "%s: of_get_parent() failed\n", __func__);
err = -ENODEV;
-   goto return_err;
+   goto free_port;


And now you no longer put port_node if jumping from here...


Sincerely apologize for that stupid mistake...



Also does the reference to put_device() not have to be released when
this function succeeds?



I'm not sure about that, since fman_port_driver doesn't define other
interface, maybe it reasonable to release it here.


}



@@ -1896,7 +1895,9 @@ static int fman_port_probe(struct platform_device *of_dev)
  
  	return 0;
  
-return_err:

+put_device:
+   put_device(&fm_pdev->dev);
+put_node:
of_node_put(port_node);
  free_port:
kfree(port);


.



BUG: sleeping function called from invalid context in corrupted

2020-11-09 Thread syzbot
Hello,

syzbot found the following issue on:

HEAD commit:bf3e7628 Merge branch 'mtd/fixes' of git://git.kernel.org/..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=16d76e2a50
kernel config:  https://syzkaller.appspot.com/x/.config?x=e791ddf0875adf65
dashboard link: https://syzkaller.appspot.com/bug?extid=b7aeb9318541a1c709f1
compiler:   clang version 11.0.0 (https://github.com/llvm/llvm-project.git 
ca2dcbd030eadbf0aa9b660efe864ff08af6e18b)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=14df611a50

The issue was bisected to:

commit dcd479e10a0510522a5d88b29b8f79ea3467d501
Author: Johannes Berg 
Date:   Fri Oct 9 12:17:11 2020 +

mac80211: always wind down STA state

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=1218ff1450
final oops: https://syzkaller.appspot.com/x/report.txt?x=1118ff1450
console output: https://syzkaller.appspot.com/x/log.txt?x=1618ff1450

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+b7aeb9318541a1c70...@syzkaller.appspotmail.com
Fixes: dcd479e10a05 ("mac80211: always wind down STA state")

BUG: sleeping function called from invalid context at 
net/mac80211/sta_info.c:1962
in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 35, name: kworker/u4:2
4 locks held by kworker/u4:2/35:
 #0: 88802af11138 ((wq_completion)phy4){+.+.}-{0:0}, at: 
process_one_work+0x6f4/0xfc0 kernel/workqueue.c:2245
 #1: c9e0fd80 ((work_completion)(&sdata->work)){+.+.}-{0:0}, at: 
process_one_work+0x733/0xfc0 kernel/workqueue.c:2247
 #2: 88802f27cd00 (&wdev->mtx){+.+.}-{3:3}, at: sdata_lock 
net/mac80211/ieee80211_i.h:1021 [inline]
 #2: 88802f27cd00 (&wdev->mtx){+.+.}-{3:3}, at: 
ieee80211_ibss_work+0x4e/0x1450 net/mac80211/ibss.c:1683


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this issue, for details see:
https://goo.gl/tpsmEJ#testing-patches


答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread zhangqilong
Hi
> 
> On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong 
> wrote:
> >
> > In many case, we need to check return value of pm_runtime_get_sync,
> > but it brings a trouble to the usage counter processing. Many callers
> > forget to decrease the usage counter when it failed. It has been
> > discussed a lot[0][1]. So we add a function to deal with the usage
> > counter for better coding.
> >
> > [0]https://lkml.org/lkml/2020/6/14/88
> > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951
> > 48.10995-1-dinghao@zju.edu.cn/
> > Signed-off-by: Zhang Qilong 
> > ---
> >  include/linux/pm_runtime.h | 32 
> >  1 file changed, 32 insertions(+)
> >
> > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > index 4b708f4e8eed..2b0af5b1dffd 100644
> > --- a/include/linux/pm_runtime.h
> > +++ b/include/linux/pm_runtime.h
> > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device
> *dev)
> > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> >
> > +/**
> > + * gene_pm_runtime_get_sync - Bump up usage counter of a device and
> resume it.
> > + * @dev: Target device.
> 
> The force argument is not documented.

(1) Good catch, I will add it in next version.

> 
> > + *
> > + * Increase runtime PM usage counter of @dev first, and carry out
> > + runtime-resume
> > + * of it synchronously. If __pm_runtime_resume return negative
> > + value(device is in
> > + * error state) or return positive value(the runtime of device is
> > + already active)
> > + * with force is true, it need decrease the usage counter of the
> > + device when
> > + * return.
> > + *
> > + * The possible return values of this function is zero or negative value.
> > + * zero:
> > + *- it means success and the status will store the resume operation
> status
> > + *  if needed, the runtime PM usage counter of @dev remains
> incremented.
> > + * negative:
> > + *- it means failure and the runtime PM usage counter of @dev has
> been
> > + *  decreased.
> > + * positive:
> > + *- it means the runtime of the device is already active before that. 
> > If
> > + *  caller set force to true, we still need to decrease the usage
> counter.
> 
> Why is this needed?

(2) If caller set force, it means caller will return even the device has 
already been active
(__pm_runtime_resume return positive value) after calling 
gene_pm_runtime_get_sync,
we still need to decrease the usage count.

> 
> > + */
> > +static inline int gene_pm_runtime_get_sync(struct device *dev, bool
> > +force)
> 
> The name is not really a good one and note that pm_runtime_get() has the
> same problem as _get_sync() (ie. the usage counter is incremented regardless
> of the return value).
> 

(3) I have not thought a good name now, if you have good ideas, welcome.


Thanks, 
Zhang

> > +{
> > +   int ret = 0;
> > +
> > +   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
> > +   if (ret < 0 || (ret > 0 && force))
> > +   pm_runtime_put_noidle(dev);
> > +
> > +   return ret;
> > +}
> > +
> >  /**
> >   * pm_runtime_put - Drop device usage counter and queue up "idle check"
> if 0.
> >   * @dev: Target device.
> > --
> 
> Thanks!


Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.

2020-11-09 Thread Vlad Buslov
On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote:
> From: wenxu 
>
> Currently kernel tc subsystem can do conntrack in act_ct. But when several
> fragment packets go through the act_ct, function tcf_ct_handle_fragments
> will defrag the packets to a big one. But the last action will redirect
> mirred to a device which maybe lead the reassembly big packet over the mtu
> of target device.
>
> This patch add support for a xmit hook to mirred, that gets executed before
> xmiting the packet. Then, when act_ct gets loaded, it configs that hook.
> The frag xmit hook maybe reused by other modules.
>
> Signed-off-by: wenxu 
> ---
> v2: Fix the crash for act_frag module without load
> v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled
> in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook
> v4: using skb_protocol and fix line length exceeds 80 columns
> v5: no change
>
>  include/net/act_api.h  |  16 +
>  net/sched/Kconfig  |  13 
>  net/sched/Makefile |   1 +
>  net/sched/act_api.c|  51 +++
>  net/sched/act_ct.c |   7 +++
>  net/sched/act_frag.c   | 164 
> +
>  net/sched/act_mirred.c |   2 +-
>  7 files changed, 253 insertions(+), 1 deletion(-)
>  create mode 100644 net/sched/act_frag.c
>
> diff --git a/include/net/act_api.h b/include/net/act_api.h
> index 8721492..403a618 100644
> --- a/include/net/act_api.h
> +++ b/include/net/act_api.h
> @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct 
> tcf_proto *tp,
>struct netlink_ext_ack *newchain);
>  struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
>struct tcf_chain *newchain);
> +
> +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff 
> *skb));
> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
> +int (*xmit)(struct sk_buff *skb)));
> +void tcf_clear_xmit_hook(void);
> +
> +#if IS_ENABLED(CONFIG_NET_ACT_FRAG)
> +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff 
> *skb));
> +#else
> +static inline int tcf_frag_xmit_hook(struct sk_buff *skb,
> +  int (*xmit)(struct sk_buff *skb))
> +{
> + return 0;
> +}
> +#endif
> +
>  #endif /* CONFIG_NET_CLS_ACT */
>  
>  static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index a3b37d8..9a240c7 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY
> To compile this code as a module, choose M here: the
> module will be called act_tunnel_key.
>  
> +config NET_ACT_FRAG
> + tristate "Packet fragmentation"
> + depends on NET_CLS_ACT
> + help
> + Say Y here to allow fragmenting big packets when outputting
> + with the mirred action.
> +
> +   If unsure, say N.
> +
> +   To compile this code as a module, choose M here: the
> +   module will be called act_frag.
> +

Just wondering, what is the motivation for putting the frag code into
standalone module? It doesn't implement usual act_* interface and is not
user-configurable. To me it looks like functionality that belongs to
act_api. Am I missing something?

>  config NET_ACT_CT
>   tristate "connection tracking tc action"
>   depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
> + depends on NET_ACT_FRAG
>   help
> Say Y here to allow sending the packets to conntrack module.
>  
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 66bbf9a..c146186 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK)   += act_meta_mark.o
>  obj-$(CONFIG_NET_IFE_SKBPRIO)+= act_meta_skbprio.o
>  obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
>  obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
> +obj-$(CONFIG_NET_ACT_FRAG)   += act_frag.o
>  obj-$(CONFIG_NET_ACT_CT) += act_ct.o
>  obj-$(CONFIG_NET_ACT_GATE)   += act_gate.o
>  obj-$(CONFIG_NET_SCH_FIFO)   += sch_fifo.o
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index f66417d..e7b501c 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -22,6 +22,57 @@
>  #include 
>  #include 
>  
> +static int (*tcf_xmit_hook)(struct sk_buff *skb,
> + int (*xmit)(struct sk_buff *skb));
> +static DEFINE_STATIC_KEY_FALSE(tcf_xmit_hook_in_use);
> +
> +static void tcf_inc_xmit_hook(void)
> +{
> + static_branch_inc(&tcf_xmit_hook_in_use);
> +}
> +
> +static void tcf_dec_xmit_hook(void)
> +{
> + static_branch_dec(&tcf_xmit_hook_in_use);
> +}
> +
> +static bool tcf_xmit_hook_enabled(void)
> +{
> + return static_branch_unlikely(&tcf_xmit_hook_in_use);
> +}
> +
> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
> +  

[PATCH V2] memory: tegra: add missing put_devcie() call in error path of tegra_emc_probe()

2020-11-09 Thread Yu Kuai
The reference to device obtained with of_find_device_by_node() should
be dropped. Thus add jump target to fix the exception handling for this
function implementation.

Fixes: 73a7f0a90641("memory: tegra: Add EMC (external memory controller) 
driver")
Signed-off-by: Yu Kuai 
---
 drivers/memory/tegra/tegra124-emc.c   | 21 +--
 .../net/ethernet/freescale/fman/fman_port.c   |  3 +--
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/memory/tegra/tegra124-emc.c 
b/drivers/memory/tegra/tegra124-emc.c
index 76ace42a688a..7d58a0e0a177 100644
--- a/drivers/memory/tegra/tegra124-emc.c
+++ b/drivers/memory/tegra/tegra124-emc.c
@@ -1207,8 +1207,10 @@ static int tegra_emc_probe(struct platform_device *pdev)
return -ENOENT;
 
emc->mc = platform_get_drvdata(mc);
-   if (!emc->mc)
-   return -EPROBE_DEFER;
+   if (!emc->mc) {
+   err = -EPROBE_DEFER;
+   goto put_device;
+   }
 
ram_code = tegra_read_ram_code();
 
@@ -1217,25 +1219,27 @@ static int tegra_emc_probe(struct platform_device *pdev)
dev_err(&pdev->dev,
"no memory timings for RAM code %u found in DT\n",
ram_code);
-   return -ENOENT;
+   err = -ENOENT;
+   goto put_device;
}
 
err = tegra_emc_load_timings_from_dt(emc, np);
of_node_put(np);
if (err)
-   return err;
+   goto put_device;
 
if (emc->num_timings == 0) {
dev_err(&pdev->dev,
"no memory timings for RAM code %u registered\n",
ram_code);
-   return -ENOENT;
+   err = -ENOENT;
+   goto put_device;
}
 
err = emc_init(emc);
if (err) {
dev_err(&pdev->dev, "EMC initialization failed: %d\n", err);
-   return err;
+   goto put_device;
}
 
platform_set_drvdata(pdev, emc);
@@ -1244,6 +1248,11 @@ static int tegra_emc_probe(struct platform_device *pdev)
emc_debugfs_init(&pdev->dev, emc);
 
return 0;
+
+put_device:
+   put_device(&mc->dev);
+
+   return err;
 };
 
 static struct platform_driver tegra_emc_driver = {
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c 
b/drivers/net/ethernet/freescale/fman/fman_port.c
index 9790e483241b..fcc59444df17 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1792,7 +1792,7 @@ static int fman_port_probe(struct platform_device *of_dev)
if (!fm_node) {
dev_err(port->dev, "%s: of_get_parent() failed\n", __func__);
err = -ENODEV;
-   goto free_port;
+   goto put_node;
}
 
fm_pdev = of_find_device_by_node(fm_node);
@@ -1899,7 +1899,6 @@ static int fman_port_probe(struct platform_device *of_dev)
put_device(&fm_pdev->dev);
 put_node:
of_node_put(port_node);
-free_port:
kfree(port);
return err;
 }
-- 
2.25.4



Re: [PATCH][next] net: dsa: fix unintended sign extension on a u16 left shift

2020-11-09 Thread Kurt Kanzenbach
On Mon Nov 09 2020, Colin King wrote:
> From: Colin Ian King 
>
> The left shift of u16 variable high is promoted to the type int and
> then sign extended to a 64 bit u64 value.  If the top bit of high is
> set then the upper 32 bits of the result end up being set by the
> sign extension. Fix this by explicitly casting the value in high to
> a u64 before left shifting by 16 places.
>
> Also, remove the initialisation of variable value to 0 at the start
> of each loop iteration as the value is never read and hence the
> assignment it is redundant.
>
> Addresses-Coverity: ("Unintended sign extension")
> Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek 
> switches")
> Signed-off-by: Colin Ian King 

Reviewed-by: Kurt Kanzenbach 


signature.asc
Description: PGP signature


Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 2:24 PM zhangqilong  wrote:
>
> Hi
> >
> > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong 
> > wrote:
> > >
> > > In many case, we need to check return value of pm_runtime_get_sync,
> > > but it brings a trouble to the usage counter processing. Many callers
> > > forget to decrease the usage counter when it failed. It has been
> > > discussed a lot[0][1]. So we add a function to deal with the usage
> > > counter for better coding.
> > >
> > > [0]https://lkml.org/lkml/2020/6/14/88
> > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951
> > > 48.10995-1-dinghao@zju.edu.cn/
> > > Signed-off-by: Zhang Qilong 
> > > ---
> > >  include/linux/pm_runtime.h | 32 
> > >  1 file changed, 32 insertions(+)
> > >
> > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > > index 4b708f4e8eed..2b0af5b1dffd 100644
> > > --- a/include/linux/pm_runtime.h
> > > +++ b/include/linux/pm_runtime.h
> > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device
> > *dev)
> > > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> > >
> > > +/**
> > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device and
> > resume it.
> > > + * @dev: Target device.
> >
> > The force argument is not documented.
>
> (1) Good catch, I will add it in next version.
>
> >
> > > + *
> > > + * Increase runtime PM usage counter of @dev first, and carry out
> > > + runtime-resume
> > > + * of it synchronously. If __pm_runtime_resume return negative
> > > + value(device is in
> > > + * error state) or return positive value(the runtime of device is
> > > + already active)
> > > + * with force is true, it need decrease the usage counter of the
> > > + device when
> > > + * return.
> > > + *
> > > + * The possible return values of this function is zero or negative value.
> > > + * zero:
> > > + *- it means success and the status will store the resume operation
> > status
> > > + *  if needed, the runtime PM usage counter of @dev remains
> > incremented.
> > > + * negative:
> > > + *- it means failure and the runtime PM usage counter of @dev has
> > been
> > > + *  decreased.
> > > + * positive:
> > > + *- it means the runtime of the device is already active before 
> > > that. If
> > > + *  caller set force to true, we still need to decrease the usage
> > counter.
> >
> > Why is this needed?
>
> (2) If caller set force, it means caller will return even the device has 
> already been active
> (__pm_runtime_resume return positive value) after calling 
> gene_pm_runtime_get_sync,
> we still need to decrease the usage count.

But who needs this?

I don't think that it is a good idea to complicate the API this way.


[PATCH ethtool 2/2] ethtool: Improve compatibility between netlink and ioctl interfaces

2020-11-09 Thread Michal Kubecek
From: Ido Schimmel 

With the ioctl interface, when autoneg is enabled, but without
specifying speed, duplex or link modes, the advertised link modes are
set to the supported link modes by the ethtool user space utility.

This does not happen when using the netlink interface. Fix this
incompatibility problem by having ethtool query the supported link modes
from the kernel and advertise all the "real" ones when only "autoneg on"
is specified.

Before:

Settings for eth0:
Supported ports: [ TP ]
Supported link modes:   10baseT/Half 10baseT/Full
100baseT/Half 100baseT/Full
1000baseT/Full
Supported pause frame use: No
Supports auto-negotiation: Yes
Supported FEC modes: Not reported
Advertised link modes:  100baseT/Half 100baseT/Full
Advertised pause frame use: No
Advertised auto-negotiation: Yes
Advertised FEC modes: Not reported
Speed: 1000Mb/s
Duplex: Full
Auto-negotiation: on
Port: Twisted Pair
PHYAD: 0
Transceiver: internal
MDI-X: off (auto)
Supports Wake-on: umbg
Wake-on: d
Current message level: 0x0007 (7)
   drv probe link
Link detected: yes

After:

Settings for eth0:
Supported ports: [ TP ]
Supported link modes:   10baseT/Half 10baseT/Full
100baseT/Half 100baseT/Full
1000baseT/Full
Supported pause frame use: No
Supports auto-negotiation: Yes
Supported FEC modes: Not reported
Advertised link modes:  10baseT/Half 10baseT/Full
100baseT/Half 100baseT/Full
1000baseT/Full
Advertised pause frame use: No
Advertised auto-negotiation: Yes
Advertised FEC modes: Not reported
Speed: 1000Mb/s
Duplex: Full
Auto-negotiation: on
Port: Twisted Pair
PHYAD: 0
Transceiver: internal
MDI-X: on (auto)
Supports Wake-on: umbg
Wake-on: d
Current message level: 0x0007 (7)
   drv probe link
Link detected: yes

Signed-off-by: Ido Schimmel 
Signed-off-by: Michal Kubecek 
---
 netlink/settings.c | 92 ++
 1 file changed, 92 insertions(+)

diff --git a/netlink/settings.c b/netlink/settings.c
index dc9280c114b5..90c28b1bc424 100644
--- a/netlink/settings.c
+++ b/netlink/settings.c
@@ -1115,6 +1115,93 @@ static const struct param_parser sset_params[] = {
  */
 #define SSET_MAX_MSGS 4
 
+static int linkmodes_reply_advert_all_cb(const struct nlmsghdr *nlhdr,
+void *data)
+{
+   const struct nlattr *tb[ETHTOOL_A_LINKMODES_MAX + 1] = {};
+   DECLARE_ATTR_TB_INFO(tb);
+   struct nl_msg_buff *req_msgbuff = data;
+   const struct nlattr *ours_attr;
+   struct nlattr *req_bitset;
+   uint32_t *supported_modes;
+   unsigned int modes_count;
+   unsigned int i;
+   int ret;
+
+   ret = mnl_attr_parse(nlhdr, GENL_HDRLEN, attr_cb, &tb_info);
+   if (ret < 0)
+   return MNL_CB_ERROR;
+   ours_attr = tb[ETHTOOL_A_LINKMODES_OURS];
+   if (!ours_attr)
+   return MNL_CB_ERROR;
+   modes_count = bitset_get_count(tb[ETHTOOL_A_LINKMODES_OURS], &ret);
+   if (ret < 0)
+   return MNL_CB_ERROR;
+   supported_modes = get_compact_bitset_mask(tb[ETHTOOL_A_LINKMODES_OURS]);
+   if (!supported_modes)
+   return MNL_CB_ERROR;
+
+   /* keep only "real" link modes */
+   for (i = 0; i < modes_count; i++)
+   if (!lm_class_match(i, LM_CLASS_REAL))
+   supported_modes[i / 32] &= ~((uint32_t)1 << (i % 32));
+
+   req_bitset = ethnla_nest_start(req_msgbuff, ETHTOOL_A_LINKMODES_OURS);
+   if (!req_bitset)
+   return MNL_CB_ERROR;
+
+   if (ethnla_put_u32(req_msgbuff, ETHTOOL_A_BITSET_SIZE, modes_count) ||
+   ethnla_put(req_msgbuff, ETHTOOL_A_BITSET_VALUE,
+  DIV_ROUND_UP(modes_count, 32) * sizeof(uint32_t),
+  supported_modes) ||
+   ethnla_put(req_msgbuff, ETHTOOL_A_BITSET_MASK,
+  DIV_ROUND_UP(modes_count, 32) * sizeof(uint32_t),
+  supported_modes)) {
+   ethnla_nest_cancel(req_msgbuff, req_bitset);
+   return MNL_CB_ERROR;
+   }
+
+   ethnla_nest_end(req_msgbuff, req_bitset);
+   return MNL_CB_OK;
+}
+
+/* For compatibility reasons with ioctl-based ethtool, when "autoneg on" is
+ * specified without "advertise", "speed" and "duplex", we need to query the
+ * supported link modes from the kernel and advertise all the "real" ones.
+ */
+static int nl_sset_compat_linkmodes(struct nl_context *nlctx,
+ 

[PATCH ethtool 1/2] netlink: do not send messages and process replies in nl_parser()

2020-11-09 Thread Michal Kubecek
When called with group_style = PARSER_GROUP_MSG, nl_parser() not only
parses the command line and composes the messages but also sends them to
kernel and processes the replies. This is inconsistent with other modes and
also impractical as it takes the control over the process from caller where
it belongs.

Modify nl_parser() to pass composed messages back to caller (which is only
nl_sset() at the moment) and let it send requests and process replies. This
will be needed for an upcoming backward compatibility patch which will need
to inspect and possibly modify one of the composed messages.

Signed-off-by: Michal Kubecek 
---
 netlink/cable_test.c |  2 +-
 netlink/channels.c   |  2 +-
 netlink/coalesce.c   |  2 +-
 netlink/eee.c|  2 +-
 netlink/parser.c | 43 ---
 netlink/parser.h |  3 ++-
 netlink/pause.c  |  2 +-
 netlink/rings.c  |  2 +-
 netlink/settings.c   | 35 ++-
 9 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/netlink/cable_test.c b/netlink/cable_test.c
index 8a7145324610..17139f7d297d 100644
--- a/netlink/cable_test.c
+++ b/netlink/cable_test.c
@@ -574,7 +574,7 @@ int nl_cable_test_tdr(struct cmd_context *ctx)
   ctx->devname, 0))
return -EMSGSIZE;
 
-   ret = nl_parser(nlctx, tdr_params, NULL, PARSER_GROUP_NEST);
+   ret = nl_parser(nlctx, tdr_params, NULL, PARSER_GROUP_NEST, NULL);
if (ret < 0)
return ret;
 
diff --git a/netlink/channels.c b/netlink/channels.c
index c6002ceeb121..894c74bcc11a 100644
--- a/netlink/channels.c
+++ b/netlink/channels.c
@@ -126,7 +126,7 @@ int nl_schannels(struct cmd_context *ctx)
   ctx->devname, 0))
return -EMSGSIZE;
 
-   ret = nl_parser(nlctx, schannels_params, NULL, PARSER_GROUP_NONE);
+   ret = nl_parser(nlctx, schannels_params, NULL, PARSER_GROUP_NONE, NULL);
if (ret < 0)
return 1;
 
diff --git a/netlink/coalesce.c b/netlink/coalesce.c
index 07a92d04b7a1..75922a91c2e7 100644
--- a/netlink/coalesce.c
+++ b/netlink/coalesce.c
@@ -254,7 +254,7 @@ int nl_scoalesce(struct cmd_context *ctx)
   ctx->devname, 0))
return -EMSGSIZE;
 
-   ret = nl_parser(nlctx, scoalesce_params, NULL, PARSER_GROUP_NONE);
+   ret = nl_parser(nlctx, scoalesce_params, NULL, PARSER_GROUP_NONE, NULL);
if (ret < 0)
return 1;
 
diff --git a/netlink/eee.c b/netlink/eee.c
index d3135b2094a4..04d8f0bbe3fc 100644
--- a/netlink/eee.c
+++ b/netlink/eee.c
@@ -174,7 +174,7 @@ int nl_seee(struct cmd_context *ctx)
   ctx->devname, 0))
return -EMSGSIZE;
 
-   ret = nl_parser(nlctx, seee_params, NULL, PARSER_GROUP_NONE);
+   ret = nl_parser(nlctx, seee_params, NULL, PARSER_GROUP_NONE, NULL);
if (ret < 0)
return 1;
 
diff --git a/netlink/parser.c b/netlink/parser.c
index 3b25f5d5a88e..c2eae93efb69 100644
--- a/netlink/parser.c
+++ b/netlink/parser.c
@@ -920,7 +920,7 @@ static void __parser_set(uint64_t *map, unsigned int idx)
 }
 
 struct tmp_buff {
-   struct nl_msg_buff  msgbuff;
+   struct nl_msg_buff  *msgbuff;
unsigned intid;
unsigned intorig_len;
struct tmp_buff *next;
@@ -951,7 +951,12 @@ static struct tmp_buff *tmp_buff_find_or_create(struct 
tmp_buff **phead,
if (!new_buff)
return NULL;
new_buff->id = id;
-   msgbuff_init(&new_buff->msgbuff);
+   new_buff->msgbuff = malloc(sizeof(*new_buff->msgbuff));
+   if (!new_buff->msgbuff) {
+   free(new_buff);
+   return NULL;
+   }
+   msgbuff_init(new_buff->msgbuff);
new_buff->next = NULL;
*pbuff = new_buff;
 
@@ -965,7 +970,10 @@ static void tmp_buff_destroy(struct tmp_buff *head)
 
while (buff) {
next = buff->next;
-   msgbuff_done(&buff->msgbuff);
+   if (buff->msgbuff) {
+   msgbuff_done(buff->msgbuff);
+   free(buff->msgbuff);
+   }
free(buff);
buff = next;
}
@@ -980,13 +988,22 @@ static void tmp_buff_destroy(struct tmp_buff *head)
  *   param_parser::offset)
  * @group_style: defines if identifiers in .group represent separate messages,
  *   nested attributes or are not allowed
+ * @msgbuffs:(only used for @group_style = PARSER_GROUP_MSG) array to store
+ *   pointers to composed messages; caller must make sure this
+ *   array is sufficient, i.e. that it has at least as many entries
+ *   as the number of different .group values in params array;
+ *   entries are filled from the start, remaining entries are not
+ *   modified; caller should zero initialize

[PATCH ethtool 0/2] netlink: improve compatibility with ioctl interface

2020-11-09 Thread Michal Kubecek
Restore special behavior of "ethtool -s  autoneg on" if no advertised
modes, speed and duplex are requested: ioctl code enables all link modes
supported by the device. This is most important for network devices which
report no advertised modes when autonegotiation is disabled.

First patch cleans up the parser interface; it allows nl_sset() to inspect
the composed message and append an attribute to it if needed.

Ido Schimmel (1):
  ethtool: Improve compatibility between netlink and ioctl interfaces

Michal Kubecek (1):
  netlink: do not send messages and process replies in nl_parser()

 netlink/cable_test.c |   2 +-
 netlink/channels.c   |   2 +-
 netlink/coalesce.c   |   2 +-
 netlink/eee.c|   2 +-
 netlink/parser.c |  43 ++-
 netlink/parser.h |   3 +-
 netlink/pause.c  |   2 +-
 netlink/rings.c  |   2 +-
 netlink/settings.c   | 127 +--
 9 files changed, 158 insertions(+), 27 deletions(-)

-- 
2.29.2



Re: [PATCH net-next 1/1] net: phy: Allow mdio buses to probe C45 before falling back to C22

2020-11-09 Thread Andrew Lunn
On Mon, Nov 09, 2020 at 08:43:47PM +0800, Wong Vee Khee wrote:
> This patch makes mdiobus_scan() to try on C45 first as C45 can access
> all devices. This allows the function available for the PHY that
> supports for both C45 and C22.
> 
> Reviewed-by: Voon Weifeng 
> Reviewed-by: Ong Boon Leong 
> Signed-off-by: Wong Vee Khee 

Hi

You need to add a user of this.

And i would like to see a more detailed explanation of why it is
needed. The PHY driver is free to do either C45 or C22 transfers.
Why does it care how the device was found?
Plus you can generally access C45 registers via the C45 over C22.  If
the PHY does not allow C45 over C22, then i expect the driver needs to
be aware of if the PHY can be access either way, and it needs to do
different things. And there is no PHY driver that i know of which does
this.

So before this goes any further, we need to see the bigger picture.

   Andrew


re: net: dsa: hellcreek: Add support for hardware timestamping

2020-11-09 Thread Colin Ian King
Hi

Static analysis on linux-next with Coverity has detected a potential
null pointer dereference issue on the following commit:

commit f0d4ba9eff75a79fccb7793f4d9f12303d458603
Author: Kamil Alkhouri 
Date:   Tue Nov 3 08:10:58 2020 +0100

net: dsa: hellcreek: Add support for hardware timestamping

The analysis is as follows:

323/* Get nanoseconds from ptp packet */
324type = SKB_PTP_TYPE(skb);

   4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12
times).
   5. var_assigned: Assigning: hdr = NULL return value from
ptp_parse_header.

325hdr  = ptp_parse_header(skb, type);

   Dereference null return value (NULL_RETURNS)
   6. dereference: Dereferencing a pointer that might be NULL hdr when
calling hellcreek_get_reserved_field.

326ns   = hellcreek_get_reserved_field(hdr);
327hellcreek_clear_reserved_field(hdr);

This issue can only occur if the type & PTP_CLASS_PMASK is not one of
PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2.  I'm not sure if this is
a possibility or not, but I'm assuming that it would be useful to
perform the null check just in case, but I'm not sure how this affects
the hw timestamping code in this function.

Colin




Re: [PATCH v4 4/7] can: replace can_dlc as variable/element for payload length

2020-11-09 Thread Vincent MAILHOL
On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote:
> diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
> index b2e8df8e4cb0..72671184a7a2 100644
> --- a/include/linux/can/dev.h
> +++ b/include/linux/can/dev.h
> @@ -183,12 +183,12 @@ static inline void can_set_static_ctrlmode(struct 
> net_device *dev,
> /* override MTU which was set by default in can_setup()? */
> if (static_mode & CAN_CTRLMODE_FD)
> dev->mtu = CANFD_MTU;
>  }
>
> -/* get data length from can_dlc with sanitized can_dlc */
> -u8 can_dlc2len(u8 can_dlc);
> +/* get data length from raw data length code (DLC) */

/*
 * convert a given data length code (dlc) of an FD CAN frame into a
 * valid data length of max. 64 bytes.
 */

I missed this point during my previous review: the can_dlc2len() function
is only valid for CAN FD frames. Comments should reflect this fact.

> +u8 can_dlc2len(u8 dlc);

Concerning the name:
 * can_get_cc_len() converts a Classical CAN frame DLC into a data
   length.
 * can_dlc2len() converts an FD CAN frame DLC into a data length.

Just realized that both macro/function do similar things so we could
think of a similar naming as well.
 * Example 1: can_get_cc_len() and can_get_fd_len()
 * Example 2: can_cc_dlc2len() and can_fd_dlc2len()

Or we could simply leave things as they are, this is not a big issue
as long as the comments clearly state which one is for classical
frames and which one is for FD frames.

>
>  /* map the sanitized data length to an appropriate data length code */
>  u8 can_len2dlc(u8 len);

can_len2dlc() might be renamed (e.g. can_get_fd_dlc()) if Example 1
solution is chosen.

>  struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int 
> echo_skb_max,

Yours sincerely,
Vincent Mailhol


Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong  wrote:
>
> In many case, we need to check return value of pm_runtime_get_sync, but
> it brings a trouble to the usage counter processing. Many callers forget
> to decrease the usage counter when it failed. It has been discussed a
> lot[0][1]. So we add a function to deal with the usage counter for better
> coding.
>
> [0]https://lkml.org/lkml/2020/6/14/88
> [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
> Signed-off-by: Zhang Qilong 
> ---
>  include/linux/pm_runtime.h | 32 
>  1 file changed, 32 insertions(+)
>
> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> index 4b708f4e8eed..2b0af5b1dffd 100644
> --- a/include/linux/pm_runtime.h
> +++ b/include/linux/pm_runtime.h
> @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct device *dev)
> return __pm_runtime_resume(dev, RPM_GET_PUT);
>  }
>
> +/**
> + * gene_pm_runtime_get_sync - Bump up usage counter of a device and resume 
> it.
> + * @dev: Target device.

The force argument is not documented.

> + *
> + * Increase runtime PM usage counter of @dev first, and carry out 
> runtime-resume
> + * of it synchronously. If __pm_runtime_resume return negative value(device 
> is in
> + * error state) or return positive value(the runtime of device is already 
> active)
> + * with force is true, it need decrease the usage counter of the device when
> + * return.
> + *
> + * The possible return values of this function is zero or negative value.
> + * zero:
> + *- it means success and the status will store the resume operation 
> status
> + *  if needed, the runtime PM usage counter of @dev remains incremented.
> + * negative:
> + *- it means failure and the runtime PM usage counter of @dev has been
> + *  decreased.
> + * positive:
> + *- it means the runtime of the device is already active before that. If
> + *  caller set force to true, we still need to decrease the usage 
> counter.

Why is this needed?

> + */
> +static inline int gene_pm_runtime_get_sync(struct device *dev, bool force)

The name is not really a good one and note that pm_runtime_get() has
the same problem as _get_sync() (ie. the usage counter is incremented
regardless of the return value).

> +{
> +   int ret = 0;
> +
> +   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
> +   if (ret < 0 || (ret > 0 && force))
> +   pm_runtime_put_noidle(dev);
> +
> +   return ret;
> +}
> +
>  /**
>   * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
>   * @dev: Target device.
> --

Thanks!


答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread zhangqilong
Hi,

> 
> On Mon, Nov 9, 2020 at 2:24 PM zhangqilong 
> wrote:
> >
> > Hi
> > >
> > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong
> > > 
> > > wrote:
> > > >
> > > > In many case, we need to check return value of
> > > > pm_runtime_get_sync, but it brings a trouble to the usage counter
> > > > processing. Many callers forget to decrease the usage counter when
> > > > it failed. It has been discussed a lot[0][1]. So we add a function
> > > > to deal with the usage counter for better coding.
> > > >
> > > > [0]https://lkml.org/lkml/2020/6/14/88
> > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520
> > > > 0951 48.10995-1-dinghao@zju.edu.cn/
> > > > Signed-off-by: Zhang Qilong 
> > > > ---
> > > >  include/linux/pm_runtime.h | 32
> 
> > > >  1 file changed, 32 insertions(+)
> > > >
> > > > diff --git a/include/linux/pm_runtime.h
> > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd
> > > > 100644
> > > > --- a/include/linux/pm_runtime.h
> > > > +++ b/include/linux/pm_runtime.h
> > > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct
> > > > device
> > > *dev)
> > > > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> > > >
> > > > +/**
> > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device
> > > > +and
> > > resume it.
> > > > + * @dev: Target device.
> > >
> > > The force argument is not documented.
> >
> > (1) Good catch, I will add it in next version.
> >
> > >
> > > > + *
> > > > + * Increase runtime PM usage counter of @dev first, and carry out
> > > > + runtime-resume
> > > > + * of it synchronously. If __pm_runtime_resume return negative
> > > > + value(device is in
> > > > + * error state) or return positive value(the runtime of device is
> > > > + already active)
> > > > + * with force is true, it need decrease the usage counter of the
> > > > + device when
> > > > + * return.
> > > > + *
> > > > + * The possible return values of this function is zero or negative 
> > > > value.
> > > > + * zero:
> > > > + *- it means success and the status will store the resume operation
> > > status
> > > > + *  if needed, the runtime PM usage counter of @dev remains
> > > incremented.
> > > > + * negative:
> > > > + *- it means failure and the runtime PM usage counter of @dev has
> > > been
> > > > + *  decreased.
> > > > + * positive:
> > > > + *- it means the runtime of the device is already active before 
> > > > that.
> If
> > > > + *  caller set force to true, we still need to decrease the usage
> > > counter.
> > >
> > > Why is this needed?
> >
> > (2) If caller set force, it means caller will return even the device
> > has already been active (__pm_runtime_resume return positive value)
> > after calling gene_pm_runtime_get_sync, we still need to decrease the
> usage count.
> 
> But who needs this?
> 
> I don't think that it is a good idea to complicate the API this way.

The callers like:
ret = pm_runtime_get_sync(dev);
if (ret) {
...
return (xxx);
}
drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn: pm_runtime_get_sync() 
also returns 1 on success
drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device() warn: 
'pm_runtime_get_sync(&pdev->dev)' returns positive and negative
drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn: pm_runtime_get_sync() also 
returns 1 on success
drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89 mtk_vcodec_dec_pw_on() 
warn: pm_runtime_get_sync() also returns 1 on success
drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn: pm_runtime_get_sync() 
also returns 1 on success
drivers/media/platform/ti-vpe/vpe.c:2478 vpe_runtime_get() warn: 
pm_runtime_get_sync() also returns 1 on success
drivers/media/i2c/smiapp/smiapp-core.c:1529 smiapp_pm_get_init() warn: 
pm_runtime_get_sync() also returns 1 on success
...
they need it to simplify the function.

If we only want to simplify like
ret = pm_runtime_get_sync(dev);
if (ret < 0) {
...
Return (xxx)
}
The parameter force could be removed.

Thanks,
Zhang


Re: [PATCH v2] Bluetooth: Move force_bredr_smp debugfs into hci_debugfs_create_bredr

2020-11-09 Thread Marcel Holtmann
Hi Claire,

> Avoid multiple attempts to create the debugfs entry, force_bredr_smp,
> by moving it from the SMP registration to the BR/EDR controller init
> section. hci_debugfs_create_bredr is only called when HCI_SETUP and
> HCI_CONFIG is not set.
> 
> Signed-off-by: Claire Chang 
> ---
> v2: correct a typo in commit message
> 
> net/bluetooth/hci_debugfs.c | 50 +
> net/bluetooth/smp.c | 44 ++--
> net/bluetooth/smp.h |  2 ++
> 3 files changed, 54 insertions(+), 42 deletions(-)

patch has been applied to bluetooth-next tree.

Regards

Marcel



Re: [RFC PATCH net-next 3/3] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on foreign bridge neighbors

2020-11-09 Thread Tobias Waldekranz
On Mon Nov 9, 2020 at 3:38 PM CET, Vladimir Oltean wrote:
> On Mon, Nov 09, 2020 at 02:31:11PM +0200, Vladimir Oltean wrote:
> > I need to sit on this for a while. How many DSA drivers do we have that
> > don't do SA learning in hardware for CPU-injected packets? ocelot/felix
> > and mv88e6xxx? Who else? Because if there aren't that many (or any at
> > all except for these two), then I could try to spend some time and see
> > how Felix behaves when I send FORWARD frames to it. Then we could go on
> > full blast with the other alternative, to force-enable address learning
> > from the CPU port, and declare this one as too complicated and not worth
> > the effort.
>
> In fact I'm not sure that I should be expecting an answer to this
> question. We can evaluate the other alternative in parallel. Would you
> be so kind to send some sort of RFC for your TX-side offload_fwd_mark so
> that I could test with the hardware I have, and get a better
> understanding
> of the limitations there?

That is the plan. I have some stuff I need to get done before
though. The current implementation is on a 4.19 kernel, so it's going
to take some time to rebase it.


re: net: dsa: hellcreek: Add support for hardware timestamping

2020-11-09 Thread Kurt Kanzenbach
Hi Colin,

On Mon Nov 09 2020, Colin Ian King wrote:
> Hi
>
> Static analysis on linux-next with Coverity has detected a potential
> null pointer dereference issue on the following commit:
>
> commit f0d4ba9eff75a79fccb7793f4d9f12303d458603
> Author: Kamil Alkhouri 
> Date:   Tue Nov 3 08:10:58 2020 +0100
>
> net: dsa: hellcreek: Add support for hardware timestamping
>
> The analysis is as follows:
>
> 323/* Get nanoseconds from ptp packet */
> 324type = SKB_PTP_TYPE(skb);
>
>4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12
> times).
>5. var_assigned: Assigning: hdr = NULL return value from
> ptp_parse_header.
>
> 325hdr  = ptp_parse_header(skb, type);
>
>Dereference null return value (NULL_RETURNS)
>6. dereference: Dereferencing a pointer that might be NULL hdr when
> calling hellcreek_get_reserved_field.
>
> 326ns   = hellcreek_get_reserved_field(hdr);
> 327hellcreek_clear_reserved_field(hdr);
>
> This issue can only occur if the type & PTP_CLASS_PMASK is not one of
> PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2.  I'm not sure if this is
> a possibility or not, but I'm assuming that it would be useful to
> perform the null check just in case, but I'm not sure how this affects
> the hw timestamping code in this function.

I don't see how the null pointer dereference could happen. That's the
Rx path you showed above.

The counter part code is:

hellcreek_port_rxtstamp:

/* Make sure the message is a PTP message that needs to be timestamped
 * and the interaction with the HW timestamping is enabled. If not, stop
 * here
 */
hdr = hellcreek_should_tstamp(hellcreek, port, skb, type);
if (!hdr)
return false;

SKB_PTP_TYPE(skb) = type;

Here the type is stored and hellcreek_should_tstamp() also calls
ptp_parse_header() internally. Only when ptp_parse_header() didn't
return NULL the first time the timestamping continues. It should be
safe.

Also the error handling would be interesting at that point. What should
happen if the header is null then? Returning an invalid timestamp?
Ignore it?

Hm. I think we have to make sure that it is a valid ptp packet before
reaching this code and that's what we've implemented. So, I guess it's
OK.

Thanks,
Kurt


signature.asc
Description: PGP signature


[MPTCP][PATCH net 1/2] mptcp: fix static checker warnings in mptcp_pm_add_timer

2020-11-09 Thread Geliang Tang
Fix the following Smatch complaint:

 net/mptcp/pm_netlink.c:213 mptcp_pm_add_timer()
 warn: variable dereferenced before check 'msk' (see line 208)

 net/mptcp/pm_netlink.c
207  struct mptcp_sock *msk = entry->sock;
208  struct sock *sk = (struct sock *)msk;
209  struct net *net = sock_net(sk);
   ^^
 "msk" dereferenced here.

210
211  pr_debug("msk=%p", msk);
212
213  if (!msk)

 Too late.

214  return;
215

Fixes: 93f323b9 ("mptcp: add a new sysctl add_addr_timeout")
Reported-by: Dan Carpenter 
Signed-off-by: Geliang Tang 
Reviewed-by: Dan Carpenter 
---
 net/mptcp/pm_netlink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6180a8b39a3f..03f2c28f11f5 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -206,7 +206,6 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer);
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
-   struct net *net = sock_net(sk);
 
pr_debug("msk=%p", msk);
 
@@ -235,7 +234,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 
if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
sk_reset_timer(sk, timer,
-  jiffies + mptcp_get_add_addr_timeout(net));
+  jiffies + 
mptcp_get_add_addr_timeout(sock_net(sk)));
 
spin_unlock_bh(&msk->pm.lock);
 
-- 
2.26.2



Re: [PATCH v2 net] ethtool: netlink: add missing netdev_features_change() call

2020-11-09 Thread Michal Kubecek
On Sun, Nov 08, 2020 at 12:46:15AM +, Alexander Lobakin wrote:
> After updating userspace Ethtool from 5.7 to 5.9, I noticed that
> NETDEV_FEAT_CHANGE is no more raised when changing netdev features
> through Ethtool.
> That's because the old Ethtool ioctl interface always calls
> netdev_features_change() at the end of user request processing to
> inform the kernel that our netdevice has some features changed, but
> the new Netlink interface does not. Instead, it just notifies itself
> with ETHTOOL_MSG_FEATURES_NTF.
> Replace this ethtool_notify() call with netdev_features_change(), so
> the kernel will be aware of any features changes, just like in case
> with the ioctl interface. This does not omit Ethtool notifications,
> as Ethtool itself listens to NETDEV_FEAT_CHANGE and drops
> ETHTOOL_MSG_FEATURES_NTF on it
> (net/ethtool/netlink.c:ethnl_netdev_event()).
> 
> From v1 [1]:
> - dropped extra new line as advised by Jakub;
> - no functional changes.
> 
> [1] 
> https://lore.kernel.org/netdev/alzxq2o5uutvhcfngoiggj8vj3kgo5yiwanqjh0...@cp3-web-009.plabs.ch
> 
> Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request")
> Signed-off-by: Alexander Lobakin 

Reviewed-by: Michal Kubecek 

> ---
>  net/ethtool/features.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/ethtool/features.c b/net/ethtool/features.c
> index 8ee4cdbd6b82..1c9f4df273bd 100644
> --- a/net/ethtool/features.c
> +++ b/net/ethtool/features.c
> @@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct 
> genl_info *info)
> active_diff_mask, compact);
>   }
>   if (mod)
> - ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
> + netdev_features_change(dev);
>  
>  out_rtnl:
>   rtnl_unlock();
> -- 
> 2.29.2
> 
> 


signature.asc
Description: PGP signature


[MPTCP][PATCH net 2/2] mptcp: cleanup for mptcp_pm_alloc_anno_list

2020-11-09 Thread Geliang Tang
This patch added NULL pointer check for mptcp_pm_alloc_anno_list, and
avoided similar static checker warnings in mptcp_pm_add_timer.

Signed-off-by: Geliang Tang 
Reviewed-by: Dan Carpenter 
---
 net/mptcp/pm_netlink.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 03f2c28f11f5..dfc1bed4a55f 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -266,7 +266,9 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 {
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
-   struct net *net = sock_net(sk);
+
+   if (!msk)
+   return false;
 
if (lookup_anno_list_by_saddr(msk, &entry->addr))
return false;
@@ -283,7 +285,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 
timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
sk_reset_timer(sk, &add_entry->add_timer,
-  jiffies + mptcp_get_add_addr_timeout(net));
+  jiffies + mptcp_get_add_addr_timeout(sock_net(sk)));
 
return true;
 }
-- 
2.26.2



[MPTCP][PATCH net 0/2] fix static checker warnings in

2020-11-09 Thread Geliang Tang
This patchset fixed static checker warnings in mptcp_pm_add_timer and
mptcp_pm_alloc_anno_list.

Geliang Tang (2):
  mptcp: fix static checker warnings in mptcp_pm_add_timer
  mptcp: cleanup for mptcp_pm_alloc_anno_list

 net/mptcp/pm_netlink.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

-- 
2.26.2



[PATCH net] tipc: fix memory leak in tipc_topsrv_start()

2020-11-09 Thread Wang Hai
kmemleak report a memory leak as follows:

unreferenced object 0x88810a596800 (size 512):
  comm "ip", pid 21558, jiffies 4297568990 (age 112.120s)
  hex dump (first 32 bytes):
00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00  .N..
ff ff ff ff ff ff ff ff 00 83 60 b0 ff ff ff ff  ..`.
  backtrace:
[<22bbe21f>] tipc_topsrv_init_net+0x1f3/0xa70
[] ops_init+0xa8/0x3c0
[<138af6f2>] setup_net+0x2de/0x7e0
[<8c6807a3>] copy_net_ns+0x27d/0x530
[<6b21adbd>] create_new_namespaces+0x382/0xa30
[] unshare_nsproxy_namespaces+0xa1/0x1d0
[] ksys_unshare+0x39c/0x780
[<09ba3b19>] __x64_sys_unshare+0x2d/0x40
[<614ad866>] do_syscall_64+0x56/0xa0
[] entry_SYSCALL_64_after_hwframe+0x44/0xa9

'srv' is malloced in tipc_topsrv_start() but not free before
leaving from the error handling cases. We need to free it.

Fixes: 5c45ab24ac77 ("tipc: make struct tipc_server private for server.c")
Reported-by: Hulk Robot 
Signed-off-by: Wang Hai 
---
 net/tipc/topsrv.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 5f6f86051c83..13f3143609f9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net)
 
ret = tipc_topsrv_work_start(srv);
if (ret < 0)
-   return ret;
+   goto err_start;
 
ret = tipc_topsrv_create_listener(srv);
if (ret < 0)
-   tipc_topsrv_work_stop(srv);
+   goto err_create;
 
+   return 0;
+
+err_create:
+   tipc_topsrv_work_stop(srv);
+err_start:
+   kfree(srv);
return ret;
 }
 
-- 
2.17.1



Re: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 2:46 PM zhangqilong  wrote:
>
> Hi,
>
> >
> > On Mon, Nov 9, 2020 at 2:24 PM zhangqilong 
> > wrote:
> > >
> > > Hi
> > > >
> > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong
> > > > 
> > > > wrote:
> > > > >
> > > > > In many case, we need to check return value of
> > > > > pm_runtime_get_sync, but it brings a trouble to the usage counter
> > > > > processing. Many callers forget to decrease the usage counter when
> > > > > it failed. It has been discussed a lot[0][1]. So we add a function
> > > > > to deal with the usage counter for better coding.
> > > > >
> > > > > [0]https://lkml.org/lkml/2020/6/14/88
> > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520
> > > > > 0951 48.10995-1-dinghao@zju.edu.cn/
> > > > > Signed-off-by: Zhang Qilong 
> > > > > ---
> > > > >  include/linux/pm_runtime.h | 32
> > 
> > > > >  1 file changed, 32 insertions(+)
> > > > >
> > > > > diff --git a/include/linux/pm_runtime.h
> > > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd
> > > > > 100644
> > > > > --- a/include/linux/pm_runtime.h
> > > > > +++ b/include/linux/pm_runtime.h
> > > > > @@ -386,6 +386,38 @@ static inline int pm_runtime_get_sync(struct
> > > > > device
> > > > *dev)
> > > > > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> > > > >
> > > > > +/**
> > > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a device
> > > > > +and
> > > > resume it.
> > > > > + * @dev: Target device.
> > > >
> > > > The force argument is not documented.
> > >
> > > (1) Good catch, I will add it in next version.
> > >
> > > >
> > > > > + *
> > > > > + * Increase runtime PM usage counter of @dev first, and carry out
> > > > > + runtime-resume
> > > > > + * of it synchronously. If __pm_runtime_resume return negative
> > > > > + value(device is in
> > > > > + * error state) or return positive value(the runtime of device is
> > > > > + already active)
> > > > > + * with force is true, it need decrease the usage counter of the
> > > > > + device when
> > > > > + * return.
> > > > > + *
> > > > > + * The possible return values of this function is zero or negative 
> > > > > value.
> > > > > + * zero:
> > > > > + *- it means success and the status will store the resume 
> > > > > operation
> > > > status
> > > > > + *  if needed, the runtime PM usage counter of @dev remains
> > > > incremented.
> > > > > + * negative:
> > > > > + *- it means failure and the runtime PM usage counter of @dev has
> > > > been
> > > > > + *  decreased.
> > > > > + * positive:
> > > > > + *- it means the runtime of the device is already active before 
> > > > > that.
> > If
> > > > > + *  caller set force to true, we still need to decrease the usage
> > > > counter.
> > > >
> > > > Why is this needed?
> > >
> > > (2) If caller set force, it means caller will return even the device
> > > has already been active (__pm_runtime_resume return positive value)
> > > after calling gene_pm_runtime_get_sync, we still need to decrease the
> > usage count.
> >
> > But who needs this?
> >
> > I don't think that it is a good idea to complicate the API this way.
>
> The callers like:
> ret = pm_runtime_get_sync(dev);
> if (ret) {
> ...
> return (xxx);
> }

Which isn't correct really, is it?

If ret is greater than 0, the error should not be returned in the
first place, so you may want the new wrapper to return zero in that
case instead.

> drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn: pm_runtime_get_sync() 
> also returns 1 on success
> drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device() warn: 
> 'pm_runtime_get_sync(&pdev->dev)' returns positive and negative
> drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn: pm_runtime_get_sync() 
> also returns 1 on success
> drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89 
> mtk_vcodec_dec_pw_on() warn: pm_runtime_get_sync() also returns 1 on success
> drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/media/platform/ti-vpe/vpe.c:2478 vpe_runtime_get() warn: 
> pm_runtime_get_sync() also returns 1 on success
> drivers/media/i2c/smiapp/smiapp-core.c:1529 smiapp_pm_get_init() warn: 
> pm_runtime_get_sync() also returns 1 on success
> ...
> they need it to simplify the function.
>
> If we only

Re: net: dsa: hellcreek: Add support for hardware timestamping

2020-11-09 Thread Colin Ian King
On 09/11/2020 13:59, Kurt Kanzenbach wrote:
> Hi Colin,
> 
> On Mon Nov 09 2020, Colin Ian King wrote:
>> Hi
>>
>> Static analysis on linux-next with Coverity has detected a potential
>> null pointer dereference issue on the following commit:
>>
>> commit f0d4ba9eff75a79fccb7793f4d9f12303d458603
>> Author: Kamil Alkhouri 
>> Date:   Tue Nov 3 08:10:58 2020 +0100
>>
>> net: dsa: hellcreek: Add support for hardware timestamping
>>
>> The analysis is as follows:
>>
>> 323/* Get nanoseconds from ptp packet */
>> 324type = SKB_PTP_TYPE(skb);
>>
>>4. returned_null: ptp_parse_header returns NULL (checked 10 out of 12
>> times).
>>5. var_assigned: Assigning: hdr = NULL return value from
>> ptp_parse_header.
>>
>> 325hdr  = ptp_parse_header(skb, type);
>>
>>Dereference null return value (NULL_RETURNS)
>>6. dereference: Dereferencing a pointer that might be NULL hdr when
>> calling hellcreek_get_reserved_field.
>>
>> 326ns   = hellcreek_get_reserved_field(hdr);
>> 327hellcreek_clear_reserved_field(hdr);
>>
>> This issue can only occur if the type & PTP_CLASS_PMASK is not one of
>> PTP_CLASS_IPV4, PTP_CLASS_IPV6 or PTP_CLASS_L2.  I'm not sure if this is
>> a possibility or not, but I'm assuming that it would be useful to
>> perform the null check just in case, but I'm not sure how this affects
>> the hw timestamping code in this function.
> 
> I don't see how the null pointer dereference could happen. That's the
> Rx path you showed above.
> 
> The counter part code is:
> 
> hellcreek_port_rxtstamp:
> 
>   /* Make sure the message is a PTP message that needs to be timestamped
>* and the interaction with the HW timestamping is enabled. If not, stop
>* here
>*/
>   hdr = hellcreek_should_tstamp(hellcreek, port, skb, type);
>   if (!hdr)
>   return false;
> 
>   SKB_PTP_TYPE(skb) = type;
> 
> Here the type is stored and hellcreek_should_tstamp() also calls
> ptp_parse_header() internally. Only when ptp_parse_header() didn't
> return NULL the first time the timestamping continues. It should be
> safe.
> 
> Also the error handling would be interesting at that point. What should
> happen if the header is null then? Returning an invalid timestamp?
> Ignore it?
> 
> Hm. I think we have to make sure that it is a valid ptp packet before
> reaching this code and that's what we've implemented. So, I guess it's
> OK.

OK - thanks, I'll mark this as a false positive.

> 
> Thanks,
> Kurt
> 



答复: [PATCH 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread zhangqilong
> On Mon, Nov 9, 2020 at 2:46 PM zhangqilong 
> wrote:
> >
> > Hi,
> >
> > >
> > > On Mon, Nov 9, 2020 at 2:24 PM zhangqilong 
> > > wrote:
> > > >
> > > > Hi
> > > > >
> > > > > On Mon, Nov 9, 2020 at 9:05 AM Zhang Qilong
> > > > > 
> > > > > wrote:
> > > > > >
> > > > > > In many case, we need to check return value of
> > > > > > pm_runtime_get_sync, but it brings a trouble to the usage
> > > > > > counter processing. Many callers forget to decrease the usage
> > > > > > counter when it failed. It has been discussed a lot[0][1]. So
> > > > > > we add a function to deal with the usage counter for better coding.
> > > > > >
> > > > > > [0]https://lkml.org/lkml/2020/6/14/88
> > > > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/2020
> > > > > > 0520
> > > > > > 0951 48.10995-1-dinghao@zju.edu.cn/
> > > > > > Signed-off-by: Zhang Qilong 
> > > > > > ---
> > > > > >  include/linux/pm_runtime.h | 32
> > > 
> > > > > >  1 file changed, 32 insertions(+)
> > > > > >
> > > > > > diff --git a/include/linux/pm_runtime.h
> > > > > > b/include/linux/pm_runtime.h index 4b708f4e8eed..2b0af5b1dffd
> > > > > > 100644
> > > > > > --- a/include/linux/pm_runtime.h
> > > > > > +++ b/include/linux/pm_runtime.h
> > > > > > @@ -386,6 +386,38 @@ static inline int
> > > > > > pm_runtime_get_sync(struct device
> > > > > *dev)
> > > > > > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> > > > > >
> > > > > > +/**
> > > > > > + * gene_pm_runtime_get_sync - Bump up usage counter of a
> > > > > > +device and
> > > > > resume it.
> > > > > > + * @dev: Target device.
> > > > >
> > > > > The force argument is not documented.
> > > >
> > > > (1) Good catch, I will add it in next version.
> > > >
> > > > >
> > > > > > + *
> > > > > > + * Increase runtime PM usage counter of @dev first, and carry
> > > > > > + out runtime-resume
> > > > > > + * of it synchronously. If __pm_runtime_resume return
> > > > > > + negative value(device is in
> > > > > > + * error state) or return positive value(the runtime of
> > > > > > + device is already active)
> > > > > > + * with force is true, it need decrease the usage counter of
> > > > > > + the device when
> > > > > > + * return.
> > > > > > + *
> > > > > > + * The possible return values of this function is zero or negative 
> > > > > > value.
> > > > > > + * zero:
> > > > > > + *- it means success and the status will store the resume
> operation
> > > > > status
> > > > > > + *  if needed, the runtime PM usage counter of @dev remains
> > > > > incremented.
> > > > > > + * negative:
> > > > > > + *- it means failure and the runtime PM usage counter of @dev
> has
> > > > > been
> > > > > > + *  decreased.
> > > > > > + * positive:
> > > > > > + *- it means the runtime of the device is already active before
> that.
> > > If
> > > > > > + *  caller set force to true, we still need to decrease the 
> > > > > > usage
> > > > > counter.
> > > > >
> > > > > Why is this needed?
> > > >
> > > > (2) If caller set force, it means caller will return even the
> > > > device has already been active (__pm_runtime_resume return
> > > > positive value) after calling gene_pm_runtime_get_sync, we still
> > > > need to decrease the
> > > usage count.
> > >
> > > But who needs this?
> > >
> > > I don't think that it is a good idea to complicate the API this way.
> >
> > The callers like:
> > ret = pm_runtime_get_sync(dev);
> > if (ret) {
> > ...
> > return (xxx);
> > }
> 
> Which isn't correct really, is it?
> 
> If ret is greater than 0, the error should not be returned in the first 
> place, so
> you may want the new wrapper to return zero in that case instead.

I get your idea.

> 
> > drivers/spi/spi-img-spfi.c:734 img_spfi_resume() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/mfd/arizona-core.c:49 arizona_clk32k_enable() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/usb/dwc3/dwc3-pci.c:212 dwc3_pci_resume_work() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/input/keyboard/omap4-keypad.c:279 omap4_keypad_probe() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/gpu/drm/vc4/vc4_dsi.c:839 vc4_dsi_encoder_enable() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/gpu/drm/i915/selftests/mock_gem_device.c:157 mock_gem_device()
> > warn: 'pm_runtime_get_sync(&pdev->dev)' returns positive and negative
> > drivers/watchdog/rti_wdt.c:230 rti_wdt_probe() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/media/platform/exynos4-is/mipi-csis.c:513 s5pcsis_s_stream()
> > warn: pm_runtime_get_sync() also returns 1 on success
> > drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c:89
> > mtk_vcodec_dec_pw_on() warn: pm_runtime_get_sync() also returns 1 on
> > success
> > drivers/media/platform/ti-vpe/cal.c:794 cal_probe() warn:
> > pm_runtime_get_sync() also returns 1 on success
> > drivers/media/platfor

RE: [EXTERNAL] Re: [PATCH bpf-next v2] Update perf ring buffer to prevent corruption

2020-11-09 Thread Kevin Sheldrake



> -Original Message-
> From: Peter Zijlstra 
> Sent: 09 November 2020 11:29
> To: Alexei Starovoitov 
> Cc: Kevin Sheldrake ; Ingo Molnar
> ; Daniel Borkmann ; Network
> Development ; b...@vger.kernel.org; Andrii
> Nakryiko ; KP Singh 
> Subject: [EXTERNAL] Re: [PATCH bpf-next v2] Update perf ring buffer to
> prevent corruption
> 
> On Thu, Nov 05, 2020 at 08:19:47PM -0800, Alexei Starovoitov wrote:
> > On Thu, Nov 5, 2020 at 7:18 AM Kevin Sheldrake
> >  wrote:
> > >
> > > Resent due to some failure at my end.  Apologies if it arrives twice.
> > >
> > > From 63e34d4106b4dd767f9bfce951f8a35f14b52072 Mon Sep 17 00:00:00
> 2001
> > > From: Kevin Sheldrake 
> > > Date: Thu, 5 Nov 2020 12:18:53 +
> > > Subject: [PATCH] Update perf ring buffer to prevent corruption from
> > >  bpf_perf_output_event()
> > >
> > > The bpf_perf_output_event() helper takes a sample size parameter of
> u64, but
> > > the underlying perf ring buffer uses a u16 internally. This 64KB maximum
> size
> > > has to also accommodate a variable sized header. Failure to observe this
> > > restriction can result in corruption of the perf ring buffer as samples
> > > overlap.
> > >
> > > Track the sample size and return -E2BIG if too big to fit into the u16
> > > size parameter.
> > >
> > > Signed-off-by: Kevin Sheldrake 
> >
> > The fix makes sense to me.
> > Peter, Ingo,
> > should I take it through the bpf tree or you want to route via tip?
> 
> What are you doing to trigger this? The Changelog is devoid of much
> useful information?

Hello

I triggered the corruption by sending samples larger than 64KB-24 bytes
to a perf ring buffer from eBPF using bpf_perf_event_output().  The u16
that holds the size in the struct perf_event_header is overflowed and
the distance between adjacent samples in the perf ring buffer is set
by this overflowed value; hence if samples of 64KB are sent, adjacent
samples are placed 24 bytes apart in the ring buffer, with the later ones
overwriting parts of the earlier ones.  If samples aren't read as quickly
as they are received, then they are corrupted by the time they are read.

Attempts to fix this in the eBPF verifier failed as the actual sample is
constructed from a variable sized header in addition to the raw data
supplied from eBPF.  The sample is constructed in perf_prepare_sample(),
outside of the eBPF engine.

My proposed fix is to check that the constructed size is https://github.com/microsoft/OMS-Auditd-Plugin/tree/MSTIC-Research/ebpf_perf_output_poc

Thanks

Kevin Sheldrake



Re: [PATCH] page_frag: Recover from memory pressure

2020-11-09 Thread Matthew Wilcox
On Thu, Nov 05, 2020 at 02:02:24PM +, Matthew Wilcox wrote:
> On Thu, Nov 05, 2020 at 02:21:25PM +0100, Eric Dumazet wrote:
> > On 11/5/20 5:21 AM, Matthew Wilcox (Oracle) wrote:
> > > When the machine is under extreme memory pressure, the page_frag allocator
> > > signals this to the networking stack by marking allocations with the
> > > 'pfmemalloc' flag, which causes non-essential packets to be dropped.
> > > Unfortunately, even after the machine recovers from the low memory
> > > condition, the page continues to be used by the page_frag allocator,
> > > so all allocations from this page will continue to be dropped.
> > > 
> > > Fix this by freeing and re-allocating the page instead of recycling it.
> > > 
> > > Reported-by: Dongli Zhang 
> > > Cc: Aruna Ramakrishna 
> > > Cc: Bert Barbe 
> > > Cc: Rama Nichanamatlu 
> > > Cc: Venkat Venkatsubra 
> > > Cc: Manjunath Patil 
> > > Cc: Joe Jin 
> > > Cc: SRINIVAS 
> > > Cc: sta...@vger.kernel.org
> > > Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve")
> > 
> > Your patch looks fine, although this Fixes: tag seems incorrect.
> > 
> > 79930f5892e ("net: do not deplete pfmemalloc reserve") was propagating
> > the page pfmemalloc status into the skb, and seems correct to me.
> > 
> > The bug was the page_frag_alloc() was keeping a problematic page for
> > an arbitrary period of time ?
> 
> Isn't this the commit which unmasks the problem, though?  I don't think
> it's the buggy commit, but if your tree doesn't have 79930f5892e, then
> you don't need this patch.
> 
> Or are you saying the problem dates back all the way to
> c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
> 
> > > + if (nc->pfmemalloc) {
> > 
> > if (unlikely(nc->pfmemalloc)) {
> 
> ACK.  Will make the change once we've settled on an appropriate Fixes tag.

Which commit should I claim this fixes?


Re: [PATCH] page_frag: Recover from memory pressure

2020-11-09 Thread Eric Dumazet



On 11/9/20 3:32 PM, Matthew Wilcox wrote:
> On Thu, Nov 05, 2020 at 02:02:24PM +, Matthew Wilcox wrote:
>> On Thu, Nov 05, 2020 at 02:21:25PM +0100, Eric Dumazet wrote:
>>> On 11/5/20 5:21 AM, Matthew Wilcox (Oracle) wrote:
 When the machine is under extreme memory pressure, the page_frag allocator
 signals this to the networking stack by marking allocations with the
 'pfmemalloc' flag, which causes non-essential packets to be dropped.
 Unfortunately, even after the machine recovers from the low memory
 condition, the page continues to be used by the page_frag allocator,
 so all allocations from this page will continue to be dropped.

 Fix this by freeing and re-allocating the page instead of recycling it.

 Reported-by: Dongli Zhang 
 Cc: Aruna Ramakrishna 
 Cc: Bert Barbe 
 Cc: Rama Nichanamatlu 
 Cc: Venkat Venkatsubra 
 Cc: Manjunath Patil 
 Cc: Joe Jin 
 Cc: SRINIVAS 
 Cc: sta...@vger.kernel.org
 Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve")
>>>
>>> Your patch looks fine, although this Fixes: tag seems incorrect.
>>>
>>> 79930f5892e ("net: do not deplete pfmemalloc reserve") was propagating
>>> the page pfmemalloc status into the skb, and seems correct to me.
>>>
>>> The bug was the page_frag_alloc() was keeping a problematic page for
>>> an arbitrary period of time ?
>>
>> Isn't this the commit which unmasks the problem, though?  I don't think
>> it's the buggy commit, but if your tree doesn't have 79930f5892e, then
>> you don't need this patch.
>>
>> Or are you saying the problem dates back all the way to
>> c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
>>
 +  if (nc->pfmemalloc) {
>>>
>>> if (unlikely(nc->pfmemalloc)) {
>>
>> ACK.  Will make the change once we've settled on an appropriate Fixes tag.
> 
> Which commit should I claim this fixes?

Hmm, no big deal, lets not waste time on tracking precise bug origin.



Re: [PATCH net] net: udp: fix Fast/frag0 UDP GRO

2020-11-09 Thread Willem de Bruijn
On Sat, Nov 7, 2020 at 8:11 PM Alexander Lobakin  wrote:
>
> While testing UDP GSO fraglists forwarding through driver that uses
> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> iperf packets:
>
> [ ID] Interval   Transfer Bitrate Jitter
> [SUM]  0.0-40.0 sec  12106 datagrams received out-of-order
>
> Simple switch to napi_gro_receive() any other method without frag0
> shortcut completely resolved them.
>
> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> callback. While it's probably OK for non-frag0 paths (when all
> headers or even the entire frame are already in skb->data), this
> inline points to junk when using Fast GRO (napi_gro_frags() or
> napi_gro_receive() with only Ethernet header in skb->data and all
> the rest in shinfo->frags) and breaks GRO packet compilation and
> the packet flow itself.
> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> are typically used. UDP even has an inline helper that makes use of
> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> to get rid of the out-of-order delivers.
>
> Present since the introduction of plain UDP GRO in 5.0-rc1.
>
> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> Signed-off-by: Alexander Lobakin 
> ---
>  net/ipv4/udp_offload.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index e67a66fbf27b..13740e9fe6ec 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -366,7 +366,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
> *skb,
>  static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>struct sk_buff *skb)
>  {
> -   struct udphdr *uh = udp_hdr(skb);
> +   struct udphdr *uh = udp_gro_udphdr(skb);
> struct sk_buff *pp = NULL;
> struct udphdr *uh2;
> struct sk_buff *p;

Good catch. skb_gro_header_slow may fail and return NULL. Need to
check that before dereferencing uh below in

/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}


[PATCH] net: dsa: mv88e6xxx: Fix memleak in mv88e6xxx_region_atu_snapshot

2020-11-09 Thread zhangxiaoxu
When mv88e6xxx_fid_map return error, we lost free the table.

Fix it.

Fixes: bfb255428966 ("net: dsa: mv88e6xxx: Add devlink regions")
Reported-by: Hulk Robot 
Signed-off-by: zhangxiaoxu 
---
 drivers/net/dsa/mv88e6xxx/devlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c 
b/drivers/net/dsa/mv88e6xxx/devlink.c
index 10cd1bfd81a0..ade04c036fd9 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -393,8 +393,10 @@ static int mv88e6xxx_region_atu_snapshot(struct devlink 
*dl,
mv88e6xxx_reg_lock(chip);
 
err = mv88e6xxx_fid_map(chip, fid_bitmap);
-   if (err)
+   if (err) {
+   kfree(table);
goto out;
+   }
 
while (1) {
fid = find_next_bit(fid_bitmap, MV88E6XXX_N_FID, fid + 1);
-- 
2.25.4



Re: [PATCH] net: dsa: mv88e6xxx: Fix memleak in mv88e6xxx_region_atu_snapshot

2020-11-09 Thread Andrew Lunn
On Mon, Nov 09, 2020 at 09:44:16AM -0500, zhangxiaoxu wrote:
> When mv88e6xxx_fid_map return error, we lost free the table.
> 
> Fix it.
> 
> Fixes: bfb255428966 ("net: dsa: mv88e6xxx: Add devlink regions")
> Reported-by: Hulk Robot 
> Signed-off-by: zhangxiaoxu 

Reviewed-by: Andrew Lunn 

Andrew


Re: [PATCH] net: tcp: ratelimit warnings in tcp_recvmsg

2020-11-09 Thread Menglong Dong
On Mon, Nov 9, 2020 at 9:36 PM Eric Dumazet  wrote:
>
> I do not think this patch is useful. That is simply code churn.
>
> Can you trigger the WARN() in the latest upstream version ?
> If yes this is a serious bug that needs urgent attention.
>
> Make sure you have backported all needed fixes into your kernel, if
> you get this warning on a non pristine kernel.

Theoretically, this WARN() shouldn't be triggered in any branches.
Somehow, it just happened in kernel v3.10. This really confused me. I
wasn't able to keep tracing it, as it is a product environment.

I notice that the codes for tcp skb receiving didn't change much
between v3.10 and the latest upstream version, and guess the latest
version can be triggered too.

If something is fixed and this WARN() won't be triggered, just ignore me.

Cheers,
Menglong Dong


Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.

2020-11-09 Thread Marcelo Ricardo Leitner
On Mon, Nov 09, 2020 at 03:24:37PM +0200, Vlad Buslov wrote:
> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote:
...
> > @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY
> >   To compile this code as a module, choose M here: the
> >   module will be called act_tunnel_key.
> >  
> > +config NET_ACT_FRAG
> > +   tristate "Packet fragmentation"
> > +   depends on NET_CLS_ACT
> > +   help
> > + Say Y here to allow fragmenting big packets when outputting
> > + with the mirred action.
> > +
> > + If unsure, say N.
> > +
> > + To compile this code as a module, choose M here: the
> > + module will be called act_frag.
> > +
> 
> Just wondering, what is the motivation for putting the frag code into
> standalone module? It doesn't implement usual act_* interface and is not
> user-configurable. To me it looks like functionality that belongs to
> act_api. Am I missing something?

It's the way we found so far for not "polluting" mirred/tc with L3
functionality, per Cong's feedbacks on previous attempts. As for why
not act_api, this is not some code that other actions can just re-use
and that file is already quite big, so I thought act_frag would be
better to keep it isolated/contained.

If act_frag is confusing, then maybe act_mirred_frag? It is a mirred
plugin now, after all.

...
> > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
> > +  int (*xmit)(struct sk_buff *skb)))
> > +{
> > +   if (!tcf_xmit_hook_enabled())
> > +   xchg(&tcf_xmit_hook, xmit_hook);
> 
> Marcelo, why did you suggest to use atomic operations to change
> tcf_xmit_hook variable? It is not obvious to me after reading the code.

I thought as a minimal way to not have problems on module removal, but
your comment below proves it is not right/enough. :-)

> 
> > +   else if (xmit_hook != tcf_xmit_hook)
> > +   return -EBUSY;
> > +
> > +   tcf_inc_xmit_hook();
> > +
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(tcf_set_xmit_hook);
> > +
> > +void tcf_clear_xmit_hook(void)
> > +{
> > +   tcf_dec_xmit_hook();
> > +
> > +   if (!tcf_xmit_hook_enabled())
> > +   xchg(&tcf_xmit_hook, NULL);
> > +}
> > +EXPORT_SYMBOL_GPL(tcf_clear_xmit_hook);
> > +
> > +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff 
> > *skb))
> > +{
> > +   if (tcf_xmit_hook_enabled())
> 
> Okay, so what happens here if tcf_xmit_hook is disabled concurrently? If
> we get here from some rule that doesn't involve act_ct but uses
> act_mirred and act_ct is concurrently removed decrementing last
> reference to static branch and setting tcf_xmit_hook to NULL?

Yeah.. good point. Thinking further now, what about using RCU for the
hook? AFAICT it can cover the synchronization needed when clearing the
pointer, tcf_set_xmit_hook() should do a module_get() and
tcf_clear_xmit_hook() can delay a module_put(act_frag) as needed with
call_rcu.

I see tcf_mirred_act is already calling rcu_dereference_bh(), so
it's already protected by rcu read here and calling tcf_xmit_hook()
with xmit pointer should be fine. WDYT?

> 
> > +   return tcf_xmit_hook(skb, xmit);
> > +   else
> > +   return xmit(skb);
> > +}
> > +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);


Re: [PATCH v4 5/7] can: update documentation for DLC usage in Classical CAN

2020-11-09 Thread Vincent MAILHOL
On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote:
>
> The extension of struct can_frame with the len8_dlc element and the
> can_dlc naming issue required an update of the documentation.
>
> Additionally introduce the term 'Classical CAN' which has been established
> by CAN in Automation to separate the original CAN2.0 A/B from CAN FD.
>
> Updated some data structures and flags.
>
> Signed-off-by: Oliver Hartkopp 
> ---
>  Documentation/networking/can.rst | 68 
>  1 file changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/networking/can.rst 
> b/Documentation/networking/can.rst
> index ff05cbd05e0d..e17c6427bb3a 100644
> --- a/Documentation/networking/can.rst
> +++ b/Documentation/networking/can.rst
> @@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to 
> different addressing
>  the socket, you can read(2) and write(2) from/to the socket or use
>  send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
>  on the socket as usual. There are also CAN specific socket options
>  described below.
>
> -The basic CAN frame structure and the sockaddr structure are defined
> -in include/linux/can.h:
> +The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure
> +and the sockaddr structure are defined in include/linux/can.h:
>
>  .. code-block:: C
>
>  struct can_frame {
>  canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
> -__u8can_dlc; /* frame payload length in byte (0 .. 8) */
> +union {
> +/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
> + * was previously named can_dlc so we need to carry that
> + * name for legacy support
> + */
> +__u8 len;
> +__u8 can_dlc; /* deprecated */
> +};
>  __u8__pad;   /* padding */
>  __u8__res0;  /* reserved / padding */
> -__u8__res1;  /* reserved / padding */
> +__u8len8_dlc; /* optional DLC for 8 byte payload length (9 
> .. 15) */
>  __u8data[8] __attribute__((aligned(8)));
>  };
>
> +Remark: The len element contains the payload length in bytes and should be
> +used instead of can_dlc. The deprecated can_dlc was misleadingly named as
> +it always contained the plain payload length in bytes and not the so called
> +'data length code' (DLC).
> +
> +To pass the raw DLC from/to a Classical CAN network device the len8_dlc
> +element can contain values 9 .. 15 when the len element is 8 (the real
> +payload length for all DLC values greater or equal to 8).

The "Classical CAN network device" part could make the reader
misunderstand that FD capable controllers can not handle Classical CAN
frames with DLC greater than 8. All the CAN-FD controllers I am aware
of can emit both Classical and FD frames. On the contrary, some
Classical CAN controllers might not support sending DLCs greater than
8. Propose to add the nuance that this depends on the device property:

 +To pass the raw DLC from/to a capable network device
 +(c.f. cc-len8-dlc CAN device property), the len8_dlc element can
 +contain values 9 .. 15 when the len element is 8 (the real payload
 +length for all DLC values greater or equal to 8).

> +
>  The alignment of the (linear) payload data[] to a 64bit boundary
>  allows the user to define their own structs and unions to easily access
>  the CAN payload. There is no given byteorder on the CAN bus by
>  default. A read(2) system call on a CAN_RAW socket transfers a
>  struct can_frame to the user space.
> @@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific 
> interface:
>  int can_ifindex;
>  union {
>  /* transport protocol class address info (e.g. ISOTP) */
>  struct { canid_t rx_id, tx_id; } tp;
>
> +/* J1939 address information */
> +struct {
> +/* 8 byte name when using dynamic addressing */
> +__u64 name;
> +
> +/* pgn:
> + * 8 bit: PS in PDU2 case, else 0
> + * 8 bit: PF
> + * 1 bit: DP
> + * 1 bit: reserved
> + */
> +__u32 pgn;
> +
> +/* 1 byte address */
> +__u8 addr;
> +} j1939;
> +
>  /* reserved for future CAN protocols address information 
> */
>  } can_addr;
>  };

This looks like some J1939 code. Did you mix your patches?

>  To determine the interface index an appropriate ioctl() has to
> @@ -369,11 +402,11 @@ bitrates for the arbitration phase and the payload 
> phase of the CAN FD frame
>  and up to 6

Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.

2020-11-09 Thread wenxu


在 2020/11/9 21:24, Vlad Buslov 写道:
> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote:
>> From: wenxu 
>>
>> Currently kernel tc subsystem can do conntrack in act_ct. But when several
>> fragment packets go through the act_ct, function tcf_ct_handle_fragments
>> will defrag the packets to a big one. But the last action will redirect
>> mirred to a device which maybe lead the reassembly big packet over the mtu
>> of target device.
>>
>> This patch add support for a xmit hook to mirred, that gets executed before
>> xmiting the packet. Then, when act_ct gets loaded, it configs that hook.
>> The frag xmit hook maybe reused by other modules.
>>
>> Signed-off-by: wenxu 
>> ---
>> v2: Fix the crash for act_frag module without load
>> v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled
>> in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook
>> v4: using skb_protocol and fix line length exceeds 80 columns
>> v5: no change
>>
>>  include/net/act_api.h  |  16 +
>>  net/sched/Kconfig  |  13 
>>  net/sched/Makefile |   1 +
>>  net/sched/act_api.c|  51 +++
>>  net/sched/act_ct.c |   7 +++
>>  net/sched/act_frag.c   | 164 
>> +
>>  net/sched/act_mirred.c |   2 +-
>>  7 files changed, 253 insertions(+), 1 deletion(-)
>>  create mode 100644 net/sched/act_frag.c
>>
>> diff --git a/include/net/act_api.h b/include/net/act_api.h
>> index 8721492..403a618 100644
>> --- a/include/net/act_api.h
>> +++ b/include/net/act_api.h
>> @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct 
>> tcf_proto *tp,
>>   struct netlink_ext_ack *newchain);
>>  struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
>>   struct tcf_chain *newchain);
>> +
>> +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff 
>> *skb));
>> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
>> +   int (*xmit)(struct sk_buff *skb)));
>> +void tcf_clear_xmit_hook(void);
>> +
>> +#if IS_ENABLED(CONFIG_NET_ACT_FRAG)
>> +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff 
>> *skb));
>> +#else
>> +static inline int tcf_frag_xmit_hook(struct sk_buff *skb,
>> + int (*xmit)(struct sk_buff *skb))
>> +{
>> +return 0;
>> +}
>> +#endif
>> +
>>  #endif /* CONFIG_NET_CLS_ACT */
>>  
>>  static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index a3b37d8..9a240c7 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY
>>To compile this code as a module, choose M here: the
>>module will be called act_tunnel_key.
>>  
>> +config NET_ACT_FRAG
>> +tristate "Packet fragmentation"
>> +depends on NET_CLS_ACT
>> +help
>> + Say Y here to allow fragmenting big packets when outputting
>> + with the mirred action.
>> +
>> +  If unsure, say N.
>> +
>> +  To compile this code as a module, choose M here: the
>> +  module will be called act_frag.
>> +
> Just wondering, what is the motivation for putting the frag code into
> standalone module? It doesn't implement usual act_* interface and is not
> user-configurable. To me it looks like functionality that belongs to
> act_api. Am I missing something?

The fragment operation is an single L3 action. 

So we put in an single modules. Maybe it is not proper to put in the act_api 
directly.

>>  config NET_ACT_CT
>>  tristate "connection tracking tc action"
>>  depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
>> +depends on NET_ACT_FRAG
>>  help
>>Say Y here to allow sending the packets to conntrack module.
>>  
>> diff --git a/net/sched/Makefile b/net/sched/Makefile
>> index 66bbf9a..c146186 100644
>> --- a/net/sched/Makefile
>> +++ b/net/sched/Makefile
>> @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK)  += act_meta_mark.o
>>  obj-$(CONFIG_NET_IFE_SKBPRIO)   += act_meta_skbprio.o
>>  obj-$(CONFIG_NET_IFE_SKBTCINDEX)+= act_meta_skbtcindex.o
>>  obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
>> +obj-$(CONFIG_NET_ACT_FRAG)  += act_frag.o
>>  obj-$(CONFIG_NET_ACT_CT)+= act_ct.o
>>  obj-$(CONFIG_NET_ACT_GATE)  += act_gate.o
>>  obj-$(CONFIG_NET_SCH_FIFO)  += sch_fifo.o
>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> index f66417d..e7b501c 100644
>> --- a/net/sched/act_api.c
>> +++ b/net/sched/act_api.c
>> @@ -22,6 +22,57 @@
>>  #include 
>>  #include 
>>  
>> +static int (*tcf_xmit_hook)(struct sk_buff *skb,
>> +int (*xmit)(struct sk_buff *skb));
>> +static DEFINE_STATIC_KEY_FALSE(tcf_xmit_hook_in_use);
>> +
>> +static void tcf_inc_xmit_hook(void)
>> +{
>> +static_branch_inc(&tcf_xmit_hook_in_use);
>> +}
>> +
>> +static void tcf_dec_xmit_hook(void)
>> +{
>>

[PATCH v2 2/2] net: fec: Fix reference count leak in fec series ops

2020-11-09 Thread Zhang Qilong
pm_runtime_get_sync() will increment pm usage at first and it will
resume the device later. If runtime of the device has error or
device is in inaccessible state(or other error state), resume
operation will fail. If we do not call put operation to decrease
the reference, it will result in reference count leak. Moreover,
this device cannot enter the idle state and always stay busy or other
non-idle state later. So we fixed it by replacing it with
pm_runtime_general_get.

Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio 
bus")
Signed-off-by: Zhang Qilong 
---
 drivers/net/ethernet/freescale/fec_main.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c 
b/drivers/net/ethernet/freescale/fec_main.c
index d7919555250d..695720f8263f 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1808,7 +1808,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int 
mii_id, int regnum)
int ret = 0, frame_start, frame_addr, frame_op;
bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-   ret = pm_runtime_get_sync(dev);
+   ret = pm_runtime_general_get(dev);
if (ret < 0)
return ret;
 
@@ -1867,11 +1867,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int 
mii_id, int regnum,
int ret, frame_start, frame_addr;
bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-   ret = pm_runtime_get_sync(dev);
+   ret = pm_runtime_general_get(dev);
if (ret < 0)
return ret;
-   else
-   ret = 0;
 
if (is_c45) {
frame_start = FEC_MMFR_ST_C45;
@@ -2275,7 +2273,7 @@ static void fec_enet_get_regs(struct net_device *ndev,
u32 i, off;
int ret;
 
-   ret = pm_runtime_get_sync(dev);
+   ret = pm_runtime_general_get(dev);
if (ret < 0)
return;
 
@@ -2976,7 +2974,7 @@ fec_enet_open(struct net_device *ndev)
int ret;
bool reset_again;
 
-   ret = pm_runtime_get_sync(&fep->pdev->dev);
+   ret = pm_runtime_general_get(&fep->pdev->dev);
if (ret < 0)
return ret;
 
@@ -3770,7 +3768,7 @@ fec_drv_remove(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
int ret;
 
-   ret = pm_runtime_get_sync(&pdev->dev);
+   ret = pm_runtime_general_get(&pdev->dev);
if (ret < 0)
return ret;
 
-- 
2.25.4



[PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Zhang Qilong
In many case, we need to check return value of pm_runtime_get_sync, but
it brings a trouble to the usage counter processing. Many callers forget
to decrease the usage counter when it failed. It has been discussed a
lot[0][1]. So we add a function to deal with the usage counter for better
coding.

[0]https://lkml.org/lkml/2020/6/14/88
[1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
Signed-off-by: Zhang Qilong 
---
 include/linux/pm_runtime.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 4b708f4e8eed..6549ce764400 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device *dev)
return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
+/**
+ * pm_runtime_general_get - Bump up usage counter of a device and resume it.
+ * @dev: Target device.
+ *
+ * Increase runtime PM usage counter of @dev first, and carry out 
runtime-resume
+ * of it synchronously. If __pm_runtime_resume return negative value(device is 
in
+ * error state), we to need decrease the usage counter before it return. If
+ * __pm_runtime_resume return positive value, it means the runtime of device 
has
+ * already been in active state, and we let the new wrapper return zero 
instead.
+ *
+ * The possible return values of this function is zero or negative value.
+ * zero:
+ *- it means resume succeeed or runtime of device has already been active, 
the
+ *  runtime PM usage counter of @dev remains incremented.
+ * negative:
+ *- it means failure and the runtime PM usage counter of @dev has been 
balanced.
+ */
+static inline int pm_runtime_general_get(struct device *dev)
+{
+   int ret = 0;
+
+   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
+   if (ret < 0) {
+   pm_runtime_put_noidle(dev);
+   return ret;
+   }
+
+   return 0;
+}
+
 /**
  * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
  * @dev: Target device.
-- 
2.25.4



[PATCH v2 0/2] Fix usage counter leak by adding a general sync ops

2020-11-09 Thread Zhang Qilong
In many case, we need to check return value of pm_runtime_get_sync,
but it brings a trouble to the usage counter processing. Many callers
forget to decrease the usage counter when it failed. It has been
discussed a lot[0][1]. So we add a function to deal with the usage
counter for better coding and view. Then, we replace pm_runtime_get_sync
with it in fec_main.c

Zhang Qilong (2):
  PM: runtime: Add a general runtime get sync operation to deal with
usage counter
  net: fec: Fix reference count leak in fec series ops

 drivers/net/ethernet/freescale/fec_main.c | 12 -
 include/linux/pm_runtime.h| 30 +++
 2 files changed, 35 insertions(+), 7 deletions(-)

-- 
2.25.4



Re: [PATCH] IPv6: Set SIT tunnel hard_header_len to zero

2020-11-09 Thread Willem de Bruijn
On Mon, Nov 9, 2020 at 4:05 AM Oliver Herms
 wrote:
>
>
> On 04.11.20 20:52, Willem de Bruijn wrote:
>  Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
> >>>
> >>> How did you arrive at this SHA1?
> >> I think the legacy usage of hard_header_len in ipv6/sit.c was overseen in 
> >> c54419321455.
> >> Please correct me if I'm wrong.
> >
> > I don't see anything in that patch assign or modify hard_header_len.
> >
> It's not assigning or modifying it but changing expectations about how 
> dev->hard_header_len is to be used.
>
> The patch changed the MTU calculation from:
> mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
>
> to this:
> mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr);
>
> Later is became this (in patch 23a3647. This is the current implementation.):
> mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - 
> tunnel_hlen;
>
> Apparently the initial usage of dev->hard_header_len was that it contains the 
> length
> of all headers before the tunnel payload. c54419321455 changed it to assuming 
> dev->hard_header_len
> does not contain the tunnels outter IP header. Thus I think the bug was 
> introduced by c54419321455.

And the only header in the case of SIT is that outer ip header. Got it, thanks.

Overly conservative MTU calculation is one issue. Packet sockets also
expect read/write link layer access with SOCK_RAW, which does not work
correctly for sit. I'm not sure that it ever did.

The chosen commit predates all stable trees, which is the most important point.

Acked-by: Willem de Bruijn 

Could ip6 tunnels have the same issue? In ip6_tnl_dev_init_gen,

dev->hard_header_len = LL_MAX_HEADER + t_hlen;


[PATCH net-next v4 02/15] net/smc: Use active link of the connection

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Use active link of the connection directly and not
via linkgroup array structure when obtaining link
data of the connection.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_diag.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index f15fca59b4b2..c2225231f679 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -160,17 +160,17 @@ static int __smc_diag_dump(struct sock *sk, struct 
sk_buff *skb,
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
-   .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
-   .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id,
+   .lnk[0].ibport = smc->conn.lnk->ibport,
+   .lnk[0].link_id = smc->conn.lnk->link_id,
};
 
memcpy(linfo.lnk[0].ibname,
   smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
-  sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
+  sizeof(smc->conn.lnk->smcibdev->ibdev->name));
smc_gid_be16_convert(linfo.lnk[0].gid,
-smc->conn.lgr->lnk[0].gid);
+smc->conn.lnk->gid);
smc_gid_be16_convert(linfo.lnk[0].peer_gid,
-smc->conn.lgr->lnk[0].peer_gid);
+smc->conn.lnk->peer_gid);
 
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
-- 
2.17.1



[PATCH net-next v4 04/15] net/smc: Add link counters for IB device ports

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Add link counters to the structure of the smc ib device, one counter per
ib port. Increase/decrease the counters as needed in the corresponding
routines.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_core.c | 13 +
 net/smc/smc_ib.h   |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 323a4b396be0..24d55b5b352b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -63,6 +63,16 @@ static inline struct list_head *smc_lgr_list_head(struct 
smc_link_group *lgr,
return &smc_lgr_list.list;
 }
 
+static void smc_ibdev_cnt_inc(struct smc_link *lnk)
+{
+   atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
+}
+
+static void smc_ibdev_cnt_dec(struct smc_link *lnk)
+{
+   atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
+}
+
 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 {
/* client link group creation always follows the server link group
@@ -316,6 +326,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct 
smc_link *lnk,
lnk->link_idx = link_idx;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
+   smc_ibdev_cnt_inc(lnk);
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
@@ -359,6 +370,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct 
smc_link *lnk,
 clear_llc_lnk:
smc_llc_link_clear(lnk, false);
 out:
+   smc_ibdev_cnt_dec(lnk);
put_device(&ini->ib_dev->ibdev->dev);
memset(lnk, 0, sizeof(struct smc_link));
lnk->state = SMC_LNK_UNUSED;
@@ -749,6 +761,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
+   smc_ibdev_cnt_dec(lnk);
put_device(&lnk->smcibdev->ibdev->dev);
smcibdev = lnk->smcibdev;
memset(lnk, 0, sizeof(struct smc_link));
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 2ce481187dd0..3b85360a473b 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -53,6 +53,8 @@ struct smc_ib_device {/* 
ib-device infos for smc */
atomic_tlnk_cnt;/* number of links on ibdev */
wait_queue_head_t   lnks_deleted;   /* wait 4 removal of all links*/
struct mutexmutex;  /* protect dev setup+cleanup */
+   atomic_tlnk_cnt_by_port[SMC_MAX_PORTS];
+   /* number of links per port */
 };
 
 struct smc_buf_desc;
-- 
2.17.1



[PATCH net-next v4 07/15] net/smc: Refactor the netlink reply processing routine

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Refactor the netlink reply processing routine so that
it provides sub functions for specific parts of the processing.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_diag.c | 218 +++--
 1 file changed, 133 insertions(+), 85 deletions(-)

diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index c2225231f679..44be723c97fe 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -69,35 +69,25 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg 
*r, struct sock *sk)
}
 }
 
-static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
-  struct smc_diag_msg *r,
-  struct user_namespace *user_ns)
+static bool smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+   struct smc_diag_msg *r,
+   struct user_namespace *user_ns)
 {
-   if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown))
-   return 1;
+   if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown) < 0)
+   return false;
 
r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->diag_inode = sock_i_ino(sk);
-   return 0;
+   return true;
 }
 
-static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
-  struct netlink_callback *cb,
-  const struct smc_diag_req *req,
-  struct nlattr *bc)
+static bool smc_diag_fill_base_struct(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct smc_diag_msg *r)
 {
struct smc_sock *smc = smc_sk(sk);
-   struct smc_diag_fallback fallback;
struct user_namespace *user_ns;
-   struct smc_diag_msg *r;
-   struct nlmsghdr *nlh;
 
-   nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-   cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI);
-   if (!nlh)
-   return -EMSGSIZE;
-
-   r = nlmsg_data(nlh);
smc_diag_msg_common_fill(r, sk);
r->diag_state = sk->sk_state;
if (smc->use_fallback)
@@ -107,89 +97,148 @@ static int __smc_diag_dump(struct sock *sk, struct 
sk_buff *skb,
else
r->diag_mode = SMC_DIAG_MODE_SMCR;
user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
-   if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
-   goto errout;
+   if (!smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
+   return false;
 
+   return true;
+}
+
+static bool smc_diag_fill_fallback(struct sock *sk, struct sk_buff *skb)
+{
+   struct smc_diag_fallback fallback;
+   struct smc_sock *smc = smc_sk(sk);
+
+   memset(&fallback, 0, sizeof(fallback));
fallback.reason = smc->fallback_rsn;
fallback.peer_diagnosis = smc->peer_diagnosis;
if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0)
+   return false;
+
+   return true;
+}
+
+static bool smc_diag_fill_conninfo(struct sock *sk, struct sk_buff *skb)
+{
+   struct smc_host_cdc_msg *local_tx, *local_rx;
+   struct smc_diag_conninfo cinfo;
+   struct smc_connection *conn;
+   struct smc_sock *smc;
+
+   smc = smc_sk(sk);
+   conn = &smc->conn;
+   local_tx = &conn->local_tx_ctrl;
+   local_rx = &conn->local_rx_ctrl;
+   memset(&cinfo, 0, sizeof(cinfo));
+   cinfo.token = conn->alert_token_local;
+   cinfo.sndbuf_size = conn->sndbuf_desc ? conn->sndbuf_desc->len : 0;
+   cinfo.rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0;
+   cinfo.peer_rmbe_size = conn->peer_rmbe_size;
+
+   cinfo.rx_prod.wrap = local_rx->prod.wrap;
+   cinfo.rx_prod.count = local_rx->prod.count;
+   cinfo.rx_cons.wrap = local_rx->cons.wrap;
+   cinfo.rx_cons.count = local_rx->cons.count;
+
+   cinfo.tx_prod.wrap = local_tx->prod.wrap;
+   cinfo.tx_prod.count = local_tx->prod.count;
+   cinfo.tx_cons.wrap = local_tx->cons.wrap;
+   cinfo.tx_cons.count = local_tx->cons.count;
+
+   cinfo.tx_prod_flags = *(u8 *)&local_tx->prod_flags;
+   cinfo.tx_conn_state_flags = *(u8 *)&local_tx->conn_state_flags;
+   cinfo.rx_prod_flags = *(u8 *)&local_rx->prod_flags;
+   cinfo.rx_conn_state_flags = *(u8 *)&local_rx->conn_state_flags;
+
+   cinfo.tx_prep.wrap = conn->tx_curs_prep.wrap;
+   cinfo.tx_prep.count = conn->tx_curs_prep.count;
+   cinfo.tx_sent.wrap = conn->tx_curs_sent.wrap;
+   cinfo.tx_sent.count = conn->tx_curs_sent.count;
+   cinfo.tx_fin.wrap = conn->tx_curs_fin.wrap;
+   cinfo.tx_fin.count = conn->tx_curs_fin.count;
+
+   if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0)
+   return false;
+
+   return true;
+}
+
+static bool smc_diag_fill_lgrinfo(struct 

[PATCH net-next v4 01/15] net/smc: use helper smc_conn_abort() in listen processing

2020-11-09 Thread Karsten Graul
The helper smc_connect_abort() can be used by the listen processing
functions, too. And rename this helper to smc_conn_abort() to make the
purpose clearer.
No functional change.

Signed-off-by: Karsten Graul 
---
 net/smc/af_smc.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 527185af7bf3..bc3e45289771 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -552,8 +552,7 @@ static int smc_connect_decline_fallback(struct smc_sock 
*smc, int reason_code,
return smc_connect_fallback(smc, reason_code);
 }
 
-/* abort connecting */
-static void smc_connect_abort(struct smc_sock *smc, int local_first)
+static void smc_conn_abort(struct smc_sock *smc, int local_first)
 {
if (local_first)
smc_lgr_cleanup_early(&smc->conn);
@@ -814,7 +813,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
return 0;
 connect_abort:
-   smc_connect_abort(smc, ini->first_contact_local);
+   smc_conn_abort(smc, ini->first_contact_local);
mutex_unlock(&smc_client_lgr_pending);
smc->connect_nonblock = 0;
 
@@ -893,7 +892,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 
return 0;
 connect_abort:
-   smc_connect_abort(smc, ini->first_contact_local);
+   smc_conn_abort(smc, ini->first_contact_local);
mutex_unlock(&smc_server_lgr_pending);
smc->connect_nonblock = 0;
 
@@ -1320,10 +1319,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, 
int reason_code,
   int local_first, u8 version)
 {
/* RDMA setup failed, switch back to TCP */
-   if (local_first)
-   smc_lgr_cleanup_early(&new_smc->conn);
-   else
-   smc_conn_free(&new_smc->conn);
+   smc_conn_abort(new_smc, local_first);
if (reason_code < 0) { /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
@@ -1429,10 +1425,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* Create send and receive buffers */
rc = smc_buf_create(new_smc, true);
if (rc) {
-   if (ini->first_contact_local)
-   smc_lgr_cleanup_early(&new_smc->conn);
-   else
-   smc_conn_free(&new_smc->conn);
+   smc_conn_abort(new_smc, ini->first_contact_local);
return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
 SMC_CLC_DECL_MEM;
}
-- 
2.17.1



[PATCH net-next v4 10/15] net/smc: Introduce SMCR get link command

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Introduce get link command which loops through
all available links of all available link groups. It
uses the SMC-R linkgroup list as entry point, not
the socket list, which makes linkgroup diagnosis
possible, in case linkgroup does not contain active
connections anymore.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc_diag.h |  8 +
 net/smc/smc_diag.c| 62 ++-
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 6ae028344b6d..a57df0296aa4 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -79,6 +80,7 @@ enum {
 /* SMC_DIAG_GET_LGR_INFO command extensions */
 enum {
SMC_DIAG_LGR_INFO_SMCR = 1,
+   SMC_DIAG_LGR_INFO_SMCR_LINK,
 };
 
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
@@ -129,6 +131,12 @@ struct smc_diag_linkinfo {
__u8 ibport;/* RDMA device port number */
__u8 gid[40];   /* local GID */
__u8 peer_gid[40];  /* peer GID */
+   /* Fields above used by legacy v1 code */
+   __u32 conn_cnt;
+   __u8 netdev[IFNAMSIZ];  /* ethernet device name */
+   __u8 link_uid[4];   /* unique link id */
+   __u8 peer_link_uid[4];  /* unique peer link id */
+   __u32 link_state;   /* link state */
 };
 
 struct smc_diag_lgrinfo {
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index c958b23843e6..9a41548d6263 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -20,6 +20,7 @@
 #include 
 
 #include "smc.h"
+#include "smc_ib.h"
 #include "smc_core.h"
 
 static const struct smc_diag_ops *smc_diag_ops;
@@ -205,6 +206,54 @@ static bool smc_diag_fill_dmbinfo(struct sock *sk, struct 
sk_buff *skb)
return true;
 }
 
+static int smc_diag_fill_lgr_link(struct smc_link_group *lgr,
+ struct smc_link *link,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct smc_diag_req_v2 *req)
+{
+   struct smc_diag_linkinfo link_info;
+   int dummy = 0, rc = 0;
+   struct nlmsghdr *nlh;
+
+   nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK,
+   cb->nlh->nlmsg_type, 0, NLM_F_MULTI);
+
+   memset(&link_info, 0, sizeof(link_info));
+   link_info.link_state = link->state;
+   link_info.link_id = link->link_id;
+   link_info.conn_cnt = atomic_read(&link->conn_cnt);
+   link_info.ibport = link->ibport;
+
+   memcpy(link_info.link_uid, link->link_uid,
+  sizeof(link_info.link_uid));
+   snprintf(link_info.ibname, sizeof(link_info.ibname), "%s",
+link->ibname);
+   snprintf(link_info.netdev, sizeof(link_info.netdev), "%s",
+link->ndevname);
+   memcpy(link_info.peer_link_uid, link->peer_link_uid,
+  sizeof(link_info.peer_link_uid));
+
+   smc_gid_be16_convert(link_info.gid,
+link->gid);
+   smc_gid_be16_convert(link_info.peer_gid,
+link->peer_gid);
+
+   /* Just a command place holder to signal back the command reply type */
+   if (nla_put(skb, SMC_DIAG_GET_LGR_INFO, sizeof(dummy), &dummy) < 0)
+   goto errout;
+   if (nla_put(skb, SMC_DIAG_LGR_INFO_SMCR_LINK,
+   sizeof(link_info), &link_info) < 0)
+   goto errout;
+
+   nlmsg_end(skb, nlh);
+   return rc;
+
+errout:
+   nlmsg_cancel(skb, nlh);
+   return -EMSGSIZE;
+}
+
 static int smc_diag_fill_lgr(struct smc_link_group *lgr,
 struct sk_buff *skb,
 struct netlink_callback *cb,
@@ -240,7 +289,7 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr,
   struct smc_diag_req_v2 *req)
 {
struct nlmsghdr *nlh;
-   int rc = 0;
+   int i, rc = 0;
 
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK,
cb->nlh->nlmsg_type, 0, NLM_F_MULTI);
@@ -252,6 +301,17 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr,
goto errout;
 
nlmsg_end(skb, nlh);
+
+   if ((req->cmd_ext & (1 << (SMC_DIAG_LGR_INFO_SMCR_LINK - 1 {
+   for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+   if (!smc_link_usable(&lgr->lnk[i]))
+   continue;
+   rc = smc_diag_fill_lgr_link(lgr, &lgr->lnk[i], skb,
+   cb, req);
+   if (rc < 0)
+   goto errout;
+   }
+   }
return rc;
 
 errout:
-- 
2.17.1



[PATCH net-next v4 11/15] net/smc: Add SMC-D Linkgroup diagnostic support

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Deliver SMCD Linkgroup information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc_diag.h |   7 +++
 net/smc/smc_core.c|   7 +++
 net/smc/smc_core.h|   2 +
 net/smc/smc_diag.c| 108 ++
 4 files changed, 124 insertions(+)

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index a57df0296aa4..5a80172df757 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -81,6 +81,7 @@ enum {
 enum {
SMC_DIAG_LGR_INFO_SMCR = 1,
SMC_DIAG_LGR_INFO_SMCR_LINK,
+   SMC_DIAG_LGR_INFO_SMCD,
 };
 
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
@@ -155,6 +156,12 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket 
internals */
__aligned_u64   my_gid; /* My GID */
__aligned_u64   token;  /* Token of DMB */
__aligned_u64   peer_token; /* Token of remote DMBE */
+   /* Fields above used by legacy v1 code */
+   __u8pnet_id[SMC_MAX_PNETID_LEN]; /* Pnet ID */
+   __u32   conns_num;  /* Number of connections */
+   __u16   chid;   /* Linkgroup CHID */
+   __u8vlan_id;/* Linkgroup vlan id */
+   struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */
 };
 
 struct smc_diag_lgr {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2dcb1208f55e..37cc754485f0 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -214,6 +214,11 @@ static void smc_lgr_unregister_conn(struct smc_connection 
*conn)
conn->lgr = NULL;
 }
 
+static struct smcd_dev_list *smc_get_smcd_dev_list(void)
+{
+   return &smcd_dev_list;
+}
+
 static struct smc_lgr_list *smc_get_lgr_list(void)
 {
return &smc_lgr_list;
@@ -221,6 +226,8 @@ static struct smc_lgr_list *smc_get_lgr_list(void)
 
 static const struct smc_diag_ops smc_diag_ops = {
.get_lgr_list   = smc_get_lgr_list,
+   .get_smcd_devices   = smc_get_smcd_dev_list,
+   .get_chid   = smc_ism_get_chid,
 };
 
 const struct smc_diag_ops *smc_get_diag_ops(void)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 09ff2011dd78..fb1f63f5e681 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -23,6 +23,8 @@
 /* Functions which are needed for diagnostic purposes by smc_diag module */
 struct smc_diag_ops {
struct smc_lgr_list *(*get_lgr_list)(void);
+   struct smcd_dev_list *(*get_smcd_devices)(void);
+   u16 (*get_chid)(struct smcd_dev *smcd);
 };
 
 struct smc_lgr_list {  /* list of link group definition */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 9a41548d6263..a644e2299dbc 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -21,6 +21,7 @@
 
 #include "smc.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 #include "smc_core.h"
 
 static const struct smc_diag_ops *smc_diag_ops;
@@ -254,6 +255,53 @@ static int smc_diag_fill_lgr_link(struct smc_link_group 
*lgr,
return -EMSGSIZE;
 }
 
+static int smc_diag_fill_smcd_lgr(struct smc_link_group *lgr,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct smc_diag_req_v2 *req)
+{
+   struct smcd_diag_dmbinfo smcd_lgr;
+   struct nlmsghdr *nlh;
+   int dummy = 0;
+   int rc = 0;
+
+   nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK,
+   cb->nlh->nlmsg_type, 0, NLM_F_MULTI);
+   if (!nlh)
+   return -EMSGSIZE;
+
+   memset(&smcd_lgr, 0, sizeof(smcd_lgr));
+   memcpy(&smcd_lgr.linkid, lgr->id, sizeof(lgr->id));
+   smcd_lgr.conns_num = lgr->conns_num;
+   smcd_lgr.vlan_id = lgr->vlan_id;
+   smcd_lgr.peer_gid = lgr->peer_gid;
+   smcd_lgr.my_gid = lgr->smcd->local_gid;
+   smcd_lgr.chid = smc_diag_ops->get_chid(lgr->smcd);
+   memcpy(&smcd_lgr.v2_lgr_info.negotiated_eid, lgr->negotiated_eid,
+  sizeof(smcd_lgr.v2_lgr_info.negotiated_eid));
+   memcpy(&smcd_lgr.v2_lgr_info.peer_hostname, lgr->peer_hostname,
+  sizeof(smcd_lgr.v2_lgr_info.peer_hostname));
+   smcd_lgr.v2_lgr_info.peer_os = lgr->peer_os;
+   smcd_lgr.v2_lgr_info.peer_smc_release = lgr->peer_smc_release;
+   smcd_lgr.v2_lgr_info.smc_version = lgr->smc_version;
+   snprintf(smcd_lgr.pnet_id, sizeof(smcd_lgr.pnet_id), "%s",
+lgr->smcd->pnetid);
+
+   /* Just a command place holder to signal back the command reply type */
+   if (nla_put(skb, SMC_DIAG_GET_LGR_INFO, sizeof(dummy), &dummy) < 0)
+   goto errout;
+
+   if (nla_put(skb, SMC_DIAG_LGR_INFO_SMCD,
+   sizeof(smcd_lgr), &smcd_lgr) < 0)
+   goto errout;
+
+   nlmsg_end(skb, nlh);
+   return rc;
+errout:
+   nlmsg_cancel(skb, nlh);

[PATCH net-next v4 09/15] net/smc: Introduce SMCR get linkgroup command

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Introduce get linkgroup command which loops through
all available SMCR linkgroups. It uses the SMC-R linkgroup
list as entry point, not the socket list, which makes
linkgroup diagnosis possible, in case linkgroup does not
contain active connections anymore.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/net/smc.h |  2 +-
 include/uapi/linux/smc.h  |  5 ++
 include/uapi/linux/smc_diag.h | 43 +
 net/smc/smc.h |  4 +-
 net/smc/smc_core.c| 15 ++
 net/smc/smc_core.h|  7 ++-
 net/smc/smc_diag.c| 91 +++
 7 files changed, 162 insertions(+), 5 deletions(-)

diff --git a/include/net/smc.h b/include/net/smc.h
index e441aa97ad61..59d25dcb8e92 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -10,8 +10,8 @@
  */
 #ifndef _SMC_H
 #define _SMC_H
+#include 
 
-#define SMC_MAX_PNETID_LEN 16  /* Max. length of PNET id */
 
 struct smc_hashinfo {
rwlock_t lock;
diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 0e11ca421ca4..635e2c2aeac5 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -3,6 +3,7 @@
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
  *  Definitions for generic netlink based configuration of an SMC-R PNET table
+ *  Definitions for SMC Linkgroup and Devices.
  *
  *  Copyright IBM Corp. 2016
  *
@@ -33,4 +34,8 @@ enum {/* SMC PNET Table 
commands */
 #define SMCR_GENL_FAMILY_NAME  "SMC_PNETID"
 #define SMCR_GENL_FAMILY_VERSION   1
 
+#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
+#define SMC_LGR_ID_SIZE4
+#define SMC_MAX_HOSTNAME_LEN   32 /* Max length of hostname */
+#define SMC_MAX_EID_LEN32 /* Max length of eid */
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 236c1c52d562..6ae028344b6d 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -4,8 +4,10 @@
 
 #include 
 #include 
+#include 
 #include 
 
+#define SMC_DIAG_EXTS_PER_CMD 16
 /* Sequence numbers */
 enum {
MAGIC_SEQ = 123456,
@@ -21,6 +23,17 @@ struct smc_diag_req {
struct inet_diag_sockid id;
 };
 
+/* Request structure v2 */
+struct smc_diag_req_v2 {
+   __u8diag_family;
+   __u8pad[2];
+   __u8diag_ext;   /* Query extended information */
+   struct inet_diag_sockid id;
+   __u32   cmd;
+   __u32   cmd_ext;
+   __u8cmd_val[8];
+};
+
 /* Base info structure. It contains socket identity (addrs/ports/cookie) based
  * on the internal clcsock, and more SMC-related socket data
  */
@@ -57,7 +70,19 @@ enum {
__SMC_DIAG_MAX,
 };
 
+/* V2 Commands */
+enum {
+   SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD,
+   __SMC_DIAG_EXT_MAX,
+};
+
+/* SMC_DIAG_GET_LGR_INFO command extensions */
+enum {
+   SMC_DIAG_LGR_INFO_SMCR = 1,
+};
+
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
+#define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1)
 
 /* SMC_DIAG_CONNINFO */
 
@@ -88,6 +113,14 @@ struct smc_diag_conninfo {
struct smc_diag_cursor  tx_fin; /* confirmed sent cursor */
 };
 
+struct smc_diag_v2_lgr_info {
+   __u8smc_version;/* SMC Version */
+   __u8peer_smc_release;   /* Peer SMC Version */
+   __u8peer_os;/* Peer operating system */
+   __u8negotiated_eid[SMC_MAX_EID_LEN]; /* Negotiated EID */
+   __u8peer_hostname[SMC_MAX_HOSTNAME_LEN]; /* Peer host */
+};
+
 /* SMC_DIAG_LINKINFO */
 
 struct smc_diag_linkinfo {
@@ -116,4 +149,14 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket 
internals */
__aligned_u64   peer_token; /* Token of remote DMBE */
 };
 
+struct smc_diag_lgr {
+   __u8lgr_id[SMC_LGR_ID_SIZE]; /* Linkgroup identifier */
+   __u8lgr_role;   /* Linkgroup role */
+   __u8lgr_type;   /* Linkgroup type */
+   __u8pnet_id[SMC_MAX_PNETID_LEN]; /* Linkgroup pnet id */
+   __u8vlan_id;/* Linkgroup vland id */
+   __u32   conns_num;  /* Number of connections */
+   __u8reserved;   /* Reserved for future use */
+   struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */
+};
 #endif /* _UAPI_SMC_DIAG_H_ */
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d65e15f0c944..d3bf81759285 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include  /* __aligned */
+#include 
 #include 
 
 #include "smc_ib.h"
@@ -29,9 +30,6 @@
 * devices
 */
 
-#define SMC_MAX_HOSTNAME_LEN   32
-#define SMC_

RE: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand documentation of clocks property

2020-11-09 Thread Badel, Laurent


> 

-
Eaton Industries Manufacturing GmbH ~ Registered place of business: Route de la 
Longeraie 7, 1110, Morges, Switzerland 

-

-Original Message-
> From: Florian Fainelli 
> Sent: Wednesday, November 04, 2020 5:02 PM
> To: Badel, Laurent ; Rob Herring
> 
> Cc: da...@davemloft.net; fugang.d...@nxp.com; and...@lunn.ch;
> lgirdw...@gmail.com; m.fel...@pengutronix.de; robh...@kernel.org;
> k...@kernel.org; li...@armlinux.org.uk; richard.leit...@skidata.com;
> netdev@vger.kernel.org; Quette, Arnaud ;
> p.za...@pengutronix.de; devicet...@vger.kernel.org; broo...@kernel.org;
> Heiner Kallweit 
> Subject: Re: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand
> documentation of clocks property
> 
> 
> 
> On 11/4/2020 4:11 AM, Badel, Laurent wrote:
> > >
> >
> > -
> > Eaton Industries Manufacturing GmbH ~ Registered place of business:
> > Route de la Longeraie 7, 1110, Morges, Switzerland
> >
> > -
> >
> > -Original Message-
> >> From: Rob Herring 
> >> Sent: Friday, October 30, 2020 8:19 PM
> >> To: Badel, Laurent 
> >> Cc: da...@davemloft.net; fugang.d...@nxp.com; and...@lunn.ch;
> >> lgirdw...@gmail.com; m.fel...@pengutronix.de; robh...@kernel.org;
> >> k...@kernel.org; li...@armlinux.org.uk; richard.leit...@skidata.com;
> >> netdev@vger.kernel.org; Quette, Arnaud ;
> >> p.za...@pengutronix.de; devicet...@vger.kernel.org;
> >> f.faine...@gmail.com; broo...@kernel.org; Heiner Kallweit
> >> 
> >> Subject: [EXTERNAL] Re: [PATCH net 2/4] net:phy:smsc: expand
> >> documentation of clocks property
> >>
> >> On Tue, 27 Oct 2020 23:27:42 +, Badel, Laurent wrote:
> >>> Subject: [PATCH net 2/4] net:phy:smsc: expand documentation of
> >>> clocks property
> >>>
> >>> Description: The ref clock is managed differently when added to the
> >>> DT entry for SMSC PHY. Thus, specify this more clearly in the
> documentation.
> >>>
> >>> Signed-off-by: Laurent Badel 
> >>> ---
> >>>  Documentation/devicetree/bindings/net/smsc-lan87xx.txt | 3 ++-
> >>>  1 file changed, 2 insertions(+), 1 deletion(-)
> >>>
> >>
> >> Acked-by: Rob Herring 
> >
> > Thank you very much.
> > I'm guessing perhaps I should re-send this as a single patch since
> > there are issues with the patch series?
> > I realize now that I should have splitted things differently.
> 
> There are several things with your patch series that make it very hard to be
> followed or to even know what is the latest version of your patch series. If
> you can resubmit everything targeting the 'net' tree along with a cover letter
> explaining the differences between v1 and v2 that would help. Please make
> sure that all of your patches reference the cover letter's Message-Id which is
> the default if you use git format-patch --cover-letter .
> 
> Thanks
> --
> Florian

I will make sure to give details as you suggested, sorry for the trouble and 
thank
you for your time reviewing. 
Laurent


[PATCH net-next v4 00/15] net/smc: extend diagnostic netlink interface

2020-11-09 Thread Karsten Graul
Please apply the following patch series for smc to netdev's net-next tree.

This patch series refactors the current netlink API in smc_diag module
which is used for diagnostic purposes and extends the netlink API in a
backward compatible way so that the extended API can provide information
about SMC linkgroups, links and devices (both for SMC-R and SMC-D) and
can still work with the legacy netlink API.

Please note that patch 9 triggers a checkpatch warning because a comment
line was added using the style of the already existing comment block.

v2: In patch 10, add missing include to uapi header smc_diag.h.

v3: Apply code style recommendations from review comments.
Instead of using EXPORTs to allow the smc_diag module to access
data of the smc module, introduce struct smc_diag_ops and let
smc_diag access the required data using function pointers.

v4: Address checkpatch.pl warnings. Do not use static inline for
functions.

Guvenc Gulce (14):
  net/smc: Use active link of the connection
  net/smc: Add connection counters for links
  net/smc: Add link counters for IB device ports
  net/smc: Add diagnostic information to smc ib-device
  net/smc: Add diagnostic information to link structure
  net/smc: Refactor the netlink reply processing routine
  net/smc: Add ability to work with extended SMC netlink API
  net/smc: Introduce SMCR get linkgroup command
  net/smc: Introduce SMCR get link command
  net/smc: Add SMC-D Linkgroup diagnostic support
  net/smc: Add support for obtaining SMCD device list
  net/smc: Add support for obtaining SMCR device list
  net/smc: Refactor smc ism v2 capability handling
  net/smc: Add support for obtaining system information

Karsten Graul (1):
  net/smc: use helper smc_conn_abort() in listen processing

 include/net/smc.h |   2 +-
 include/uapi/linux/smc.h  |   8 +
 include/uapi/linux/smc_diag.h | 108 +
 net/smc/af_smc.c  |  29 +-
 net/smc/smc.h |   4 +-
 net/smc/smc_clc.c |   5 +
 net/smc/smc_clc.h |   1 +
 net/smc/smc_core.c|  72 +++-
 net/smc/smc_core.h|  26 +-
 net/smc/smc_diag.c| 788 ++
 net/smc/smc_ib.c  |  45 ++
 net/smc/smc_ib.h  |   5 +-
 net/smc/smc_ism.c |   8 +-
 net/smc/smc_ism.h |   5 +-
 net/smc/smc_pnet.c|   3 +
 15 files changed, 986 insertions(+), 123 deletions(-)

-- 
2.17.1



[PATCH net-next v4 14/15] net/smc: Refactor smc ism v2 capability handling

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Encapsulate the smc ism v2 capability boolean value
in a function for better information hiding.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/af_smc.c  | 12 ++--
 net/smc/smc_ism.c |  8 +++-
 net/smc/smc_ism.h |  5 ++---
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bc3e45289771..850e6df47a59 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -668,7 +668,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
ini->smc_type_v1 = SMC_TYPE_N;
} /* else RDMA is supported for this connection */
}
-   if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini))
+   if (smc_ism_is_v2_capable() && smc_find_ism_v2_device_clnt(smc, ini))
ini->smc_type_v2 = SMC_TYPE_N;
 
/* if neither ISM nor RDMA are supported, fallback */
@@ -920,7 +920,7 @@ static int smc_connect_check_aclc(struct smc_init_info *ini,
 /* perform steps before actually connecting */
 static int __smc_connect(struct smc_sock *smc)
 {
-   u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
+   u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
struct smc_clc_msg_accept_confirm_v2 *aclc2;
struct smc_clc_msg_accept_confirm *aclc;
struct smc_init_info *ini = NULL;
@@ -945,9 +945,9 @@ static int __smc_connect(struct smc_sock *smc)
version);
 
ini->smcd_version = SMC_V1;
-   ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0;
+   ini->smcd_version |= smc_ism_is_v2_capable() ? SMC_V2 : 0;
ini->smc_type_v1 = SMC_TYPE_B;
-   ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N;
+   ini->smc_type_v2 = smc_ism_is_v2_capable() ? SMC_TYPE_D : SMC_TYPE_N;
 
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
@@ -1354,7 +1354,7 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
rc = SMC_CLC_DECL_PEERNOSMC;
goto out;
}
-   if (!smc_ism_v2_capable) {
+   if (!smc_ism_is_v2_capable()) {
ini->smcd_version &= ~SMC_V2;
rc = SMC_CLC_DECL_NOISM2SUPP;
goto out;
@@ -1680,7 +1680,7 @@ static void smc_listen_work(struct work_struct *work)
 {
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
-   u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1;
+   u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm *cclc;
struct smc_clc_msg_proposal_area *buf;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 6abbdd09a580..2456ee8228cd 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -21,7 +21,7 @@ struct smcd_dev_list smcd_dev_list = {
.mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex)
 };
 
-bool smc_ism_v2_capable;
+static bool smc_ism_v2_capable;
 
 /* Test if an ISM communication is possible - same CPC */
 int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev 
*smcd)
@@ -51,6 +51,12 @@ u16 smc_ism_get_chid(struct smcd_dev *smcd)
return smcd->ops->get_chid(smcd);
 }
 
+/* HW supports ISM V2 and thus System EID is defined */
+bool smc_ism_is_v2_capable(void)
+{
+   return smc_ism_v2_capable;
+}
+
 /* Set a connection using this DMBE. */
 void smc_ism_set_conn(struct smc_connection *conn)
 {
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 8048e09ddcf8..481a4b7df30b 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -10,6 +10,7 @@
 #define SMCD_ISM_H
 
 #include 
+#include 
 #include 
 
 #include "smc.h"
@@ -20,9 +21,6 @@ struct smcd_dev_list {/* List of SMCD devices */
 };
 
 extern struct smcd_dev_listsmcd_dev_list;  /* list of smcd devices */
-extern boolsmc_ism_v2_capable; /* HW supports ISM V2 and thus
-* System EID is defined
-*/
 
 struct smc_ism_vlanid {/* VLAN id set on ISM device */
struct list_head list;
@@ -52,5 +50,6 @@ int smc_ism_write(struct smcd_dev *dev, const struct 
smc_ism_position *pos,
 int smc_ism_signal_shutdown(struct smc_link_group *lgr);
 void smc_ism_get_system_eid(struct smcd_dev *dev, u8 **eid);
 u16 smc_ism_get_chid(struct smcd_dev *dev);
+bool smc_ism_is_v2_capable(void);
 void smc_ism_init(void);
 #endif
-- 
2.17.1



[PATCH net-next v4 13/15] net/smc: Add support for obtaining SMCR device list

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Deliver SMCR device information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc_diag.h |   6 ++
 net/smc/smc_core.c|   7 ++
 net/smc/smc_core.h|   2 +
 net/smc/smc_diag.c| 133 ++
 4 files changed, 148 insertions(+)

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index ab8f76bdd1a4..4c6332785533 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -88,6 +88,7 @@ enum {
 /* SMC_DIAG_GET_DEV_INFO command extensions */
 enum {
SMC_DIAG_DEV_INFO_SMCD = 1,
+   SMC_DIAG_DEV_INFO_SMCR,
 };
 
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
@@ -182,6 +183,11 @@ struct smc_diag_dev_info {
__u16   pci_vendor; /* PCI Vendor */
__u16   pci_device; /* PCI Device Vendor ID */
__u8pci_id[SMC_PCI_ID_STR_LEN]; /* PCI ID */
+   __u8dev_name[IB_DEVICE_NAME_MAX]; /* IB Device name */
+   __u8netdev[SMC_MAX_PORTS][IFNAMSIZ]; /* Netdev name(s) */
+   __u8port_state[SMC_MAX_PORTS]; /* IB Port State */
+   __u8port_valid[SMC_MAX_PORTS]; /* Is IB Port valid */
+   __u32   lnk_cnt_by_port[SMC_MAX_PORTS]; /* # lnks per port */
 };
 
 struct smc_diag_lgr {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 37cc754485f0..f23f8f1d10d8 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -214,6 +214,11 @@ static void smc_lgr_unregister_conn(struct smc_connection 
*conn)
conn->lgr = NULL;
 }
 
+static struct smc_ib_devices *smc_get_smc_ib_devices(void)
+{
+   return &smc_ib_devices;
+}
+
 static struct smcd_dev_list *smc_get_smcd_dev_list(void)
 {
return &smcd_dev_list;
@@ -228,6 +233,8 @@ static const struct smc_diag_ops smc_diag_ops = {
.get_lgr_list   = smc_get_lgr_list,
.get_smcd_devices   = smc_get_smcd_dev_list,
.get_chid   = smc_ism_get_chid,
+   .get_ib_devices = smc_get_smc_ib_devices,
+   .is_ib_port_active  = smc_ib_port_active,
 };
 
 const struct smc_diag_ops *smc_get_diag_ops(void)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index eec19a8e394c..6bf89bfe34bd 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -25,6 +25,8 @@ struct smc_diag_ops {
struct smc_lgr_list *(*get_lgr_list)(void);
struct smcd_dev_list *(*get_smcd_devices)(void);
u16 (*get_chid)(struct smcd_dev *smcd);
+   struct smc_ib_devices *(*get_ib_devices)(void);
+   bool (*is_ib_port_active)(struct smc_ib_device *smcibdev, u8 ibport);
 };
 
 struct smc_lgr_list {  /* list of link group definition */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 6e7798dc57fb..3d5151919326 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -386,6 +386,33 @@ static int smc_diag_handle_lgr(struct smc_link_group *lgr,
return rc;
 }
 
+static bool smcr_diag_is_dev_critical(struct smc_lgr_list *smc_lgr,
+ struct smc_ib_device *smcibdev)
+{
+   struct smc_link_group *lgr;
+   bool rc = false;
+   int i;
+
+   spin_lock_bh(&smc_lgr->lock);
+   list_for_each_entry(lgr, &smc_lgr->list, list) {
+   if (lgr->is_smcd)
+   continue;
+   for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+   if (lgr->lnk[i].state == SMC_LNK_UNUSED ||
+   lgr->lnk[i].smcibdev != smcibdev)
+   continue;
+   if (lgr->type == SMC_LGR_SINGLE ||
+   lgr->type == SMC_LGR_ASYMMETRIC_LOCAL) {
+   rc = true;
+   goto out;
+   }
+   }
+   }
+out:
+   spin_unlock_bh(&smc_lgr->lock);
+   return rc;
+}
+
 static int smc_diag_fill_lgr_list(struct smc_lgr_list *smc_lgr,
  struct sk_buff *skb,
  struct netlink_callback *cb,
@@ -541,6 +568,109 @@ static int smc_diag_prep_smcd_dev(struct smcd_dev_list 
*dev_list,
return rc;
 }
 
+static void smc_diag_handle_dev_port(struct smc_diag_dev_info *smc_diag_dev,
+struct ib_device *ibdev,
+struct smc_ib_device *smcibdev,
+int port)
+{
+   unsigned char port_state;
+
+   smc_diag_dev->port_valid[port] = 1;
+   snprintf((char *)&smc_diag_dev->netdev[port],
+sizeof(smc_diag_dev->netdev[port]),
+"%s", (char *)&smcibdev->netdev[port]);
+   snprintf((char *)&smc_diag_dev->pnet_id[port],
+sizeof(smc_diag_dev->pnet_id[port]), "%s",
+(char *)&smcibdev->pnetid[port]

[PATCH net-next v4 15/15] net/smc: Add support for obtaining system information

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Add new netlink command to obtain system information
of the smc module.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc.h  |  1 +
 include/uapi/linux/smc_diag.h | 17 ++
 net/smc/smc_clc.c |  5 +++
 net/smc/smc_clc.h |  1 +
 net/smc/smc_core.c|  3 ++
 net/smc/smc_core.h|  3 ++
 net/smc/smc_diag.c| 62 +++
 7 files changed, 92 insertions(+)

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 736e8b98c8a5..04385a98037a 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -38,6 +38,7 @@ enum {/* SMC PNET Table 
commands */
 #define SMC_LGR_ID_SIZE4
 #define SMC_MAX_HOSTNAME_LEN   32 /* Max length of hostname */
 #define SMC_MAX_EID_LEN32 /* Max length of eid */
+#define SMC_MAX_EID8 /* Max number of eids */
 #define SMC_MAX_PORTS  2 /* Max # of ports per ib device */
 #define SMC_PCI_ID_STR_LEN 16 /* Max length of pci id string */
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 4c6332785533..d63b08c0b7e8 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -75,6 +75,7 @@ enum {
 enum {
SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD,
SMC_DIAG_GET_DEV_INFO,
+   SMC_DIAG_GET_SYS_INFO,
__SMC_DIAG_EXT_MAX,
 };
 
@@ -91,6 +92,11 @@ enum {
SMC_DIAG_DEV_INFO_SMCR,
 };
 
+/* SMC_DIAG_GET_SYS_INFO command extensions */
+enum {
+   SMC_DIAG_SYS_INFO = 1,
+};
+
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
 #define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1)
 
@@ -131,6 +137,17 @@ struct smc_diag_v2_lgr_info {
__u8peer_hostname[SMC_MAX_HOSTNAME_LEN]; /* Peer host */
 };
 
+struct smc_system_info {
+   __u8smc_version;/* SMC Version */
+   __u8smc_release;/* SMC Release */
+   __u8ueid_count; /* Number of UEIDs */
+   __u8smc_ism_is_v2;  /* Is ISM SMC v2 capable */
+   __u32   reserved;   /* Reserved for future use */
+   __u8local_hostname[SMC_MAX_HOSTNAME_LEN]; /* Hostnames */
+   __u8seid[SMC_MAX_EID_LEN];  /* System EID */
+   __u8ueid[SMC_MAX_EID][SMC_MAX_EID_LEN]; /* User EIDs */
+};
+
 /* SMC_DIAG_LINKINFO */
 
 struct smc_diag_linkinfo {
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 696d89c2dce4..e286dafd6e88 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -772,6 +772,11 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool 
srv_first_contact,
return len > 0 ? 0 : len;
 }
 
+void smc_clc_get_hostname(u8 **host)
+{
+   *host = &smc_hostname[0];
+}
+
 void __init smc_clc_init(void)
 {
struct new_utsname *u;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 49752c997c51..32d37f7b70f2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -334,5 +334,6 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool 
clnt_first_contact,
 int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version);
 void smc_clc_init(void) __init;
+void smc_clc_get_hostname(u8 **host);
 
 #endif
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f23f8f1d10d8..b79daa3cf0b0 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -235,6 +235,9 @@ static const struct smc_diag_ops smc_diag_ops = {
.get_chid   = smc_ism_get_chid,
.get_ib_devices = smc_get_smc_ib_devices,
.is_ib_port_active  = smc_ib_port_active,
+   .get_system_eid = smc_ism_get_system_eid,
+   .get_hostname   = smc_clc_get_hostname,
+   .is_v2_capable  = smc_ism_is_v2_capable,
 };
 
 const struct smc_diag_ops *smc_get_diag_ops(void)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6bf89bfe34bd..3536fa3e45af 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -27,6 +27,9 @@ struct smc_diag_ops {
u16 (*get_chid)(struct smcd_dev *smcd);
struct smc_ib_devices *(*get_ib_devices)(void);
bool (*is_ib_port_active)(struct smc_ib_device *smcibdev, u8 ibport);
+   void (*get_system_eid)(struct smcd_dev *smcd, u8 **eid);
+   void (*get_hostname)(u8 **host);
+   bool (*is_v2_capable)(void);
 };
 
 struct smc_lgr_list {  /* list of link group definition */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 3d5151919326..baa6c66aa320 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -671,6 +671,64 @@ static int smc_diag_prep_smcr_dev(struct smc_ib_devices 
*dev_list,
return rc;
 }
 
+static int smc_diag_prep_sys_info(struct smcd_dev_list *dev_list,
+

[PATCH net-next v4 06/15] net/smc: Add diagnostic information to link structure

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

During link creation add network and ib-device name to
link structure. This is needed for diagnostic purposes.

When diagnostic information is gathered, we need to traverse
device, linkgroup and link structures, to be able to do that
we need to hold a spinlock for the linkgroup list, without this
diagnostic information in link structure, another device list
mutex holding would be necessary to dereference the device
pointer in the link structure which would be impossible when
holding a spinlock already.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_core.c | 11 +++
 net/smc/smc_core.h |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 24d55b5b352b..ca8b1644ba85 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -313,6 +313,16 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr)
return link_id;
 }
 
+static void smcr_copy_dev_info_to_link(struct smc_link *link)
+{
+   struct smc_ib_device *smcibdev = link->smcibdev;
+
+   snprintf(link->ibname, sizeof(link->ibname), "%s",
+smcibdev->ibdev->name);
+   snprintf(link->ndevname, sizeof(link->ndevname), "%s",
+smcibdev->netdev[link->ibport - 1]);
+}
+
 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
   u8 link_idx, struct smc_init_info *ini)
 {
@@ -327,6 +337,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct 
smc_link *lnk,
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
smc_ibdev_cnt_inc(lnk);
+   smcr_copy_dev_info_to_link(lnk);
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 83a88a4635db..ee073a191d40 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -124,6 +124,9 @@ struct smc_link {
u8  link_is_asym;   /* is link asymmetric? */
struct smc_link_group   *lgr;   /* parent link group */
struct work_struct  link_down_wrk;  /* wrk to bring link down */
+   /* Diagnostic relevant link information */
+   charibname[IB_DEVICE_NAME_MAX];/* ib device name */
+   charndevname[IFNAMSIZ];/* network device name */
 
enum smc_link_state state;  /* state of link */
struct delayed_work llc_testlink_wrk; /* testlink worker */
-- 
2.17.1



[PATCH net-next v4 08/15] net/smc: Add ability to work with extended SMC netlink API

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

smc_diag module should be able to work with legacy and
extended netlink api. This is done by using the sequence field
of the netlink message header. Sequence field is optional and was
filled with a constant value MAGIC_SEQ in the current
implementation.
New constant values MAGIC_SEQ_V2 and MAGIC_SEQ_V2_ACK are used to
signal the usage of the new Netlink API between userspace and
kernel.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc_diag.h |  7 +++
 net/smc/smc_diag.c| 21 +
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 8cb3a6fef553..236c1c52d562 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -6,6 +6,13 @@
 #include 
 #include 
 
+/* Sequence numbers */
+enum {
+   MAGIC_SEQ = 123456,
+   MAGIC_SEQ_V2,
+   MAGIC_SEQ_V2_ACK,
+};
+
 /* Request structure */
 struct smc_diag_req {
__u8diag_family;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 44be723c97fe..bc2b616524ff 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -293,19 +293,24 @@ static int smc_diag_dump(struct sk_buff *skb, struct 
netlink_callback *cb)
return skb->len;
 }
 
+static int smc_diag_dump_ext(struct sk_buff *skb, struct netlink_callback *cb)
+{
+   return skb->len;
+}
+
 static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 {
struct net *net = sock_net(skb->sk);
-
+   struct netlink_dump_control c = {
+   .min_dump_alloc = SKB_WITH_OVERHEAD(32768),
+   };
if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
h->nlmsg_flags & NLM_F_DUMP) {
-   {
-   struct netlink_dump_control c = {
-   .dump = smc_diag_dump,
-   .min_dump_alloc = SKB_WITH_OVERHEAD(32768),
-   };
-   return netlink_dump_start(net->diag_nlsk, skb, h, &c);
-   }
+   if (h->nlmsg_seq >= MAGIC_SEQ_V2)
+   c.dump = smc_diag_dump_ext;
+   else
+   c.dump = smc_diag_dump;
+   return netlink_dump_start(net->diag_nlsk, skb, h, &c);
}
return 0;
 }
-- 
2.17.1



[PATCH net-next v4 05/15] net/smc: Add diagnostic information to smc ib-device

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

During smc ib-device creation, add network device name to smc
ib-device structure. Register for netdevice name changes and
update ib-device accordingly. This is needed for diagnostic purposes.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_ib.c   | 45 +
 net/smc/smc_ib.h   |  2 ++
 net/smc/smc_pnet.c |  3 +++
 3 files changed, 50 insertions(+)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 1c314dbdc7fa..300cca9296be 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -557,6 +557,50 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device 
*smcibdev)
 
 static struct ib_client smc_ib_client;
 
+static void smc_copy_netdev_name(struct smc_ib_device *smcibdev, int port)
+{
+   struct ib_device *ibdev = smcibdev->ibdev;
+   struct net_device *ndev;
+
+   if (!ibdev->ops.get_netdev)
+   return;
+   ndev = ibdev->ops.get_netdev(ibdev, port + 1);
+   if (ndev) {
+   snprintf(smcibdev->netdev[port],
+sizeof(smcibdev->netdev[port]),
+"%s", ndev->name);
+   dev_put(ndev);
+   }
+}
+
+void smc_ib_ndev_name_change(struct net_device *ndev)
+{
+   struct smc_ib_device *smcibdev;
+   struct ib_device *libdev;
+   struct net_device *lndev;
+   u8 port_cnt;
+   int i;
+
+   mutex_lock(&smc_ib_devices.mutex);
+   list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
+   port_cnt = smcibdev->ibdev->phys_port_cnt;
+   for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) {
+   libdev = smcibdev->ibdev;
+   if (!libdev->ops.get_netdev)
+   continue;
+   lndev = libdev->ops.get_netdev(libdev, i + 1);
+   if (lndev)
+   dev_put(lndev);
+   if (lndev != ndev)
+   continue;
+   snprintf(smcibdev->netdev[i],
+sizeof(smcibdev->netdev[i]),
+"%s", ndev->name);
+   }
+   }
+   mutex_unlock(&smc_ib_devices.mutex);
+}
+
 /* callback function for ib_register_client() */
 static int smc_ib_add_dev(struct ib_device *ibdev)
 {
@@ -596,6 +640,7 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
   smcibdev->pnetid[i]))
smc_pnetid_by_table_ib(smcibdev, i + 1);
+   smc_copy_netdev_name(smcibdev, i);
pr_warn_ratelimited("smc:ib device %s port %d has pnetid "
"%.16s%s\n",
smcibdev->ibdev->name, i + 1,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 3b85360a473b..5319496adea0 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -55,11 +55,13 @@ struct smc_ib_device {  /* 
ib-device infos for smc */
struct mutexmutex;  /* protect dev setup+cleanup */
atomic_tlnk_cnt_by_port[SMC_MAX_PORTS];
/* number of links per port */
+   charnetdev[SMC_MAX_PORTS][IFNAMSIZ];/* ndev names */
 };
 
 struct smc_buf_desc;
 struct smc_link;
 
+void smc_ib_ndev_name_change(struct net_device *ndev);
 int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index f3c18b991d35..b0f40d73afd6 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -828,6 +828,9 @@ static int smc_pnet_netdev_event(struct notifier_block 
*this,
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
+   case NETDEV_CHANGENAME:
+   smc_ib_ndev_name_change(event_dev);
+   return NOTIFY_OK;
case NETDEV_REGISTER:
smc_pnet_add_by_ndev(event_dev);
return NOTIFY_OK;
-- 
2.17.1



[PATCH net-next v4 12/15] net/smc: Add support for obtaining SMCD device list

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Deliver SMCD device information via netlink based
diagnostic interface.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 include/uapi/linux/smc.h  |  2 +
 include/uapi/linux/smc_diag.h | 20 
 net/smc/smc_core.h|  8 +++
 net/smc/smc_diag.c| 95 +++
 net/smc/smc_ib.h  |  1 -
 5 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h
index 635e2c2aeac5..736e8b98c8a5 100644
--- a/include/uapi/linux/smc.h
+++ b/include/uapi/linux/smc.h
@@ -38,4 +38,6 @@ enum {/* SMC PNET Table 
commands */
 #define SMC_LGR_ID_SIZE4
 #define SMC_MAX_HOSTNAME_LEN   32 /* Max length of hostname */
 #define SMC_MAX_EID_LEN32 /* Max length of eid */
+#define SMC_MAX_PORTS  2 /* Max # of ports per ib device */
+#define SMC_PCI_ID_STR_LEN 16 /* Max length of pci id string */
 #endif /* _UAPI_LINUX_SMC_H */
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 5a80172df757..ab8f76bdd1a4 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -74,6 +74,7 @@ enum {
 /* V2 Commands */
 enum {
SMC_DIAG_GET_LGR_INFO = SMC_DIAG_EXTS_PER_CMD,
+   SMC_DIAG_GET_DEV_INFO,
__SMC_DIAG_EXT_MAX,
 };
 
@@ -84,6 +85,11 @@ enum {
SMC_DIAG_LGR_INFO_SMCD,
 };
 
+/* SMC_DIAG_GET_DEV_INFO command extensions */
+enum {
+   SMC_DIAG_DEV_INFO_SMCD = 1,
+};
+
 #define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
 #define SMC_DIAG_EXT_MAX (__SMC_DIAG_EXT_MAX - 1)
 
@@ -164,6 +170,20 @@ struct smcd_diag_dmbinfo { /* SMC-D Socket 
internals */
struct smc_diag_v2_lgr_info v2_lgr_info; /* SMCv2 info */
 };
 
+struct smc_diag_dev_info {
+   /* Pnet ID per device port */
+   __u8pnet_id[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
+   /* whether pnetid is set by user */
+   __u8pnetid_by_user[SMC_MAX_PORTS];
+   __u32   use_cnt;/* Number of linkgroups */
+   __u8is_critical;/* Is device critical */
+   __u32   pci_fid;/* PCI FID */
+   __u16   pci_pchid;  /* PCI CHID */
+   __u16   pci_vendor; /* PCI Vendor */
+   __u16   pci_device; /* PCI Device Vendor ID */
+   __u8pci_id[SMC_PCI_ID_STR_LEN]; /* PCI ID */
+};
+
 struct smc_diag_lgr {
__u8lgr_id[SMC_LGR_ID_SIZE]; /* Linkgroup identifier */
__u8lgr_role;   /* Linkgroup role */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fb1f63f5e681..eec19a8e394c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -373,6 +373,14 @@ static inline bool smc_link_active(struct smc_link *lnk)
return lnk->state == SMC_LNK_ACTIVE;
 }
 
+struct smc_pci_dev {
+   __u32   pci_fid;
+   __u16   pci_pchid;
+   __u16   pci_vendor;
+   __u16   pci_device;
+   __u8pci_id[SMC_PCI_ID_STR_LEN];
+};
+
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index a644e2299dbc..6e7798dc57fb 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -35,6 +36,24 @@ static struct smc_diag_dump_ctx *smc_dump_context(struct 
netlink_callback *cb)
return (struct smc_diag_dump_ctx *)cb->ctx;
 }
 
+static void smc_set_pci_values(struct pci_dev *pci_dev,
+  struct smc_pci_dev *smc_dev)
+{
+   smc_dev->pci_vendor = pci_dev->vendor;
+   smc_dev->pci_device = pci_dev->device;
+   snprintf(smc_dev->pci_id, sizeof(smc_dev->pci_id), "%s",
+pci_name(pci_dev));
+#if IS_ENABLED(CONFIG_S390)
+   { /* Set s390 specific PCI information */
+   struct zpci_dev *zdev;
+
+   zdev = to_zpci(pci_dev);
+   smc_dev->pci_fid = zdev->fid;
+   smc_dev->pci_pchid = zdev->pchid;
+   }
+#endif
+}
+
 static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
 {
sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
@@ -450,6 +469,78 @@ static int smc_diag_fill_smcd_dev(struct smcd_dev_list 
*dev_list,
return rc;
 }
 
+static int smc_diag_handle_smcd_dev(struct smcd_dev *smcd,
+   struct sk_buff *skb,
+   struct netlink_callback *cb,
+   struct smc_diag_req_v2 *req)
+{
+   struct smc_diag_dev_info smc_diag_dev;
+   struct smc_pci_dev smc_pci_dev;
+   struct nlmsghdr *nlh;
+   int dummy = 0;
+   int rc = 0;
+
+   nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, MAGIC_SEQ_V2_ACK,
+ 

[PATCH net-next v4 03/15] net/smc: Add connection counters for links

2020-11-09 Thread Karsten Graul
From: Guvenc Gulce 

Add connection counters to the structure of the link.
Increase/decrease the counters as needed in the corresponding
routines.

Signed-off-by: Guvenc Gulce 
Signed-off-by: Karsten Graul 
---
 net/smc/smc_core.c | 16 ++--
 net/smc/smc_core.h |  1 +
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2b19863f7171..323a4b396be0 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -139,6 +139,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection 
*conn, bool first)
}
if (!conn->lnk)
return SMC_CLC_DECL_NOACTLINK;
+   atomic_inc(&conn->lnk->conn_cnt);
return 0;
 }
 
@@ -180,6 +181,8 @@ static void __smc_lgr_unregister_conn(struct smc_connection 
*conn)
struct smc_link_group *lgr = conn->lgr;
 
rb_erase(&conn->alert_node, &lgr->conns_all);
+   if (conn->lnk)
+   atomic_dec(&conn->lnk->conn_cnt);
lgr->conns_num--;
conn->alert_token_local = 0;
sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
@@ -314,6 +317,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct 
smc_link *lnk,
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+   atomic_set(&lnk->conn_cnt, 0);
smc_llc_link_set_uid(lnk);
INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
if (!ini->ib_dev->initialized) {
@@ -526,6 +530,14 @@ static int smc_switch_cursor(struct smc_sock *smc, struct 
smc_cdc_tx_pend *pend,
return rc;
 }
 
+static void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk)
+{
+   atomic_dec(&conn->lnk->conn_cnt);
+   conn->lnk = to_lnk;
+   atomic_inc(&conn->lnk->conn_cnt);
+}
+
 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
  struct smc_link *from_lnk, bool is_dev_err)
 {
@@ -574,7 +586,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group 
*lgr,
smc->sk.sk_state == SMC_PEERABORTWAIT ||
smc->sk.sk_state == SMC_PROCESSABORT) {
spin_lock_bh(&conn->send_lock);
-   conn->lnk = to_lnk;
+   smc_switch_link_and_count(conn, to_lnk);
spin_unlock_bh(&conn->send_lock);
continue;
}
@@ -588,7 +600,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group 
*lgr,
}
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
-   conn->lnk = to_lnk;
+   smc_switch_link_and_count(conn, to_lnk);
rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 9aee54a6bcba..83a88a4635db 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -129,6 +129,7 @@ struct smc_link {
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion   llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
+   atomic_tconn_cnt;
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
-- 
2.17.1



Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong  wrote:
>
> In many case, we need to check return value of pm_runtime_get_sync, but
> it brings a trouble to the usage counter processing. Many callers forget
> to decrease the usage counter when it failed. It has been discussed a
> lot[0][1]. So we add a function to deal with the usage counter for better
> coding.
>
> [0]https://lkml.org/lkml/2020/6/14/88
> [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
> Signed-off-by: Zhang Qilong 
> ---
>  include/linux/pm_runtime.h | 30 ++
>  1 file changed, 30 insertions(+)
>
> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> index 4b708f4e8eed..6549ce764400 100644
> --- a/include/linux/pm_runtime.h
> +++ b/include/linux/pm_runtime.h
> @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device *dev)
> return __pm_runtime_resume(dev, RPM_GET_PUT);
>  }
>
> +/**
> + * pm_runtime_general_get - Bump up usage counter of a device and resume it.
> + * @dev: Target device.
> + *
> + * Increase runtime PM usage counter of @dev first, and carry out 
> runtime-resume
> + * of it synchronously. If __pm_runtime_resume return negative value(device 
> is in
> + * error state), we to need decrease the usage counter before it return. If
> + * __pm_runtime_resume return positive value, it means the runtime of device 
> has
> + * already been in active state, and we let the new wrapper return zero 
> instead.
> + *
> + * The possible return values of this function is zero or negative value.
> + * zero:
> + *- it means resume succeeed or runtime of device has already been 
> active, the
> + *  runtime PM usage counter of @dev remains incremented.
> + * negative:
> + *- it means failure and the runtime PM usage counter of @dev has been 
> balanced.

The kerneldoc above is kind of noisy and it is hard to figure out what
the helper really does from it.

You could basically say something like "Resume @dev synchronously and
if that is successful, increment its runtime PM usage counter.  Return
0 if the runtime PM usage counter of @dev has been incremented or a
negative error code otherwise."

> + */
> +static inline int pm_runtime_general_get(struct device *dev)

What about pm_runtime_resume_and_get()?

> +{
> +   int ret = 0;

This extra initialization is not necessary.

You can initialize ret to the __pm_runtime_resume() return value right away.

> +
> +   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
> +   if (ret < 0) {
> +   pm_runtime_put_noidle(dev);
> +   return ret;
> +   }
> +
> +   return 0;
> +}
> +
>  /**
>   * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
>   * @dev: Target device.
> --


Re: [PATCH net-next 05/18] rtnetlink: Add RTNH_F_TRAP flag

2020-11-09 Thread Ido Schimmel
On Fri, Nov 06, 2020 at 11:12:21AM -0800, Jakub Kicinski wrote:
> On Wed,  4 Nov 2020 15:30:27 +0200 Ido Schimmel wrote:
> > *flags |= (nhc->nhc_flags & RTNH_F_ONLINK);
> > if (nhc->nhc_flags & RTNH_F_OFFLOAD)
> > *flags |= RTNH_F_OFFLOAD;
> > +   if (nhc->nhc_flags & RTNH_F_TRAP)
> > +   *flags |= RTNH_F_TRAP;
> 
> Out of curiosity - why use this if construct like OFFLOAD rather than
> the more concise mask like ONLINK does?

Good question :)

> In fact looks like the mask could just be extended there instead?

Yes, good suggestion. Will do that.


[PATCH v5 7/8] can-dev: introduce helpers to access Classical CAN DLC values

2020-11-09 Thread Oliver Hartkopp
can_get_len8_dlc: get value to fill len8_dlc at frame reception time
can_get_cc_dlc: get DLC value to be written into CAN controller

Signed-off-by: Oliver Hartkopp 
---
 include/linux/can/dev.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index e767a96ae075..f25558609d09 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -168,10 +168,29 @@ static inline bool can_is_canfd_skb(const struct sk_buff 
*skb)
 {
/* the CAN specific type of skb is identified by its data length */
return skb->len == CANFD_MTU;
 }
 
+/* helper to handle len8_dlc value for Classical CAN raw DLC access */
+static inline u8 can_check_len8_dlc(u32 ctrlmode, u8 len, u8 dlc, u8 ret)
+{
+   /* return value for len8_dlc only if all conditions apply */
+   if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) &&
+   (len == CAN_MAX_DLEN) &&
+   (dlc > CAN_MAX_DLEN && dlc <= CAN_MAX_RAW_DLC))
+   ret = dlc;
+
+   /* no valid len8_dlc value -> return provided default value */
+   return ret;
+}
+
+/* get value to fill len8_dlc in struct can_frame at frame reception time */
+#define can_get_len8_dlc(cm, len, dlc) can_check_len8_dlc(cm, len, dlc, 0)
+
+/* get DLC value to be written into Classical CAN controller at tx time */
+#define can_get_cc_dlc(cm, len, dlc) can_check_len8_dlc(cm, len, dlc, len)
+
 /* helper to define static CAN controller features at device creation time */
 static inline void can_set_static_ctrlmode(struct net_device *dev,
   u32 static_mode)
 {
struct can_priv *priv = netdev_priv(dev);
-- 
2.28.0



[PATCH v5 5/8] can: rename CAN FD related can_len2dlc and can_dlc2len helpers

2020-11-09 Thread Oliver Hartkopp
The helper functions can_len2dlc and can_dlc2len are only relevant for
CAN FD data length code (DLC) conversion.

To fit the introduced can_cc_dlc2len for Classical CAN we rename:

can_dlc2len -> can_fd_dlc2len to get the payload length from the DLC
can_len2dlc -> can_fd_len2dlc to get the DLC from the payload length

Suggested-by: Vincent Mailhol 
Signed-off-by: Oliver Hartkopp 
---
 Documentation/networking/can.rst  | 2 +-
 drivers/net/can/dev.c | 8 
 drivers/net/can/flexcan.c | 4 ++--
 drivers/net/can/ifi_canfd/ifi_canfd.c | 4 ++--
 drivers/net/can/kvaser_pciefd.c   | 6 +++---
 drivers/net/can/m_can/m_can.c | 6 +++---
 drivers/net/can/peak_canfd/peak_canfd.c   | 4 ++--
 drivers/net/can/rcar/rcar_canfd.c | 4 ++--
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 8 
 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 6 +++---
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 4 ++--
 drivers/net/can/xilinx_can.c  | 4 ++--
 include/linux/can/dev.h   | 4 ++--
 13 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index ff05cbd05e0d..4895b0dd2714 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -1330,11 +1330,11 @@ payload. The representation of this length in 
can_frame.can_dlc and
 canfd_frame.len for userspace applications and inside the Linux network
 layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
 The data length code was a 1:1 mapping to the payload length in the legacy
 CAN frames anyway. The payload length to the bus-relevant DLC mapping is
 only performed inside the CAN drivers, preferably with the helper
-functions can_dlc2len() and can_len2dlc().
+functions can_fd_dlc2len() and can_fd_len2dlc().
 
 The CAN netdevice driver capabilities can be distinguished by the network
 devices maximum transfer unit (MTU)::
 
   MTU = 16 (CAN_MTU)   => sizeof(struct can_frame)   => 'legacy' CAN device
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 566501a02b91..7878544184b9 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -29,15 +29,15 @@ MODULE_AUTHOR("Wolfgang Grandegger ");
 
 static const u8 dlc2len[] = {0, 1, 2, 3, 4, 5, 6, 7,
 8, 12, 16, 20, 24, 32, 48, 64};
 
 /* get data length from raw data length code (DLC) */
-u8 can_dlc2len(u8 dlc)
+u8 can_fd_dlc2len(u8 dlc)
 {
return dlc2len[dlc & 0x0F];
 }
-EXPORT_SYMBOL_GPL(can_dlc2len);
+EXPORT_SYMBOL_GPL(can_fd_dlc2len);
 
 static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8,/* 0 - 
8 */
 9, 9, 9, 9,/* 9 - 12 */
 10, 10, 10, 10,/* 13 - 16 */
 11, 11, 11, 11,/* 17 - 20 */
@@ -47,18 +47,18 @@ static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 
/* 0 - 8 */
 14, 14, 14, 14, 14, 14, 14, 14,/* 41 - 48 */
 15, 15, 15, 15, 15, 15, 15, 15,/* 49 - 56 */
 15, 15, 15, 15, 15, 15, 15, 15};   /* 57 - 64 */
 
 /* map the sanitized data length to an appropriate data length code */
-u8 can_len2dlc(u8 len)
+u8 can_fd_len2dlc(u8 len)
 {
if (unlikely(len > 64))
return 0xF;
 
return len2dlc[len];
 }
-EXPORT_SYMBOL_GPL(can_len2dlc);
+EXPORT_SYMBOL_GPL(can_fd_len2dlc);
 
 #ifdef CONFIG_CAN_CALC_BITTIMING
 #define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */
 
 /* Bit-timing calculation derived from:
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index e76fb1500fa1..5542290d29f5 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -742,11 +742,11 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff 
*skb, struct net_device *de
 {
const struct flexcan_priv *priv = netdev_priv(dev);
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
u32 can_id;
u32 data;
-   u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_len2dlc(cfd->len)) << 16);
+   u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_fd_len2dlc(cfd->len)) << 16);
int i;
 
if (can_dropped_invalid_skb(dev, skb))
return NETDEV_TX_OK;
 
@@ -996,11 +996,11 @@ static struct sk_buff *flexcan_mailbox_read(struct 
can_rx_offload *offload,
cfd->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK;
 
if (reg_ctrl & FLEXCAN_MB_CNT_EDL) {
-   cfd->len = can_dlc2len((reg_ctrl >> 16) & 0xf);
+   cfd->len = can_fd_dlc2len((reg_ctrl >> 16) & 0xf);
 
if (reg_ctrl & FLEXCAN_MB_CNT_

[PATCH v5 3/8] can: remove obsolete get_canfd_dlc() macro

2020-11-09 Thread Oliver Hartkopp
The macro was always used together with can_dlc2len() which sanitizes the
given dlc value on its own.

Signed-off-by: Oliver Hartkopp 
---
 drivers/net/can/flexcan.c | 2 +-
 drivers/net/can/peak_canfd/peak_canfd.c   | 2 +-
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 2 +-
 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 2 +-
 include/linux/can/dev.h   | 1 -
 include/linux/can/dev/peak_canfd.h| 2 +-
 7 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index d77276cd1d99..e76fb1500fa1 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -996,11 +996,11 @@ static struct sk_buff *flexcan_mailbox_read(struct 
can_rx_offload *offload,
cfd->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK;
 
if (reg_ctrl & FLEXCAN_MB_CNT_EDL) {
-   cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0xf));
+   cfd->len = can_dlc2len((reg_ctrl >> 16) & 0xf);
 
if (reg_ctrl & FLEXCAN_MB_CNT_BRS)
cfd->flags |= CANFD_BRS;
} else {
cfd->len = can_cc_dlc2len((reg_ctrl >> 16) & 0xf);
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c 
b/drivers/net/can/peak_canfd/peak_canfd.c
index 9ea2adea3f0f..c6077e07214e 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -255,11 +255,11 @@ static int pucan_handle_can_rx(struct peak_canfd_priv 
*priv,
struct sk_buff *skb;
const u16 rx_msg_flags = le16_to_cpu(msg->flags);
u8 cf_len;
 
if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN)
-   cf_len = can_dlc2len(get_canfd_dlc(pucan_msg_get_dlc(msg)));
+   cf_len = can_dlc2len(pucan_msg_get_dlc(msg));
else
cf_len = can_cc_dlc2len(pucan_msg_get_dlc(msg));
 
/* if this frame is an echo, */
if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) {
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index c0a08400f444..3bac7274ee5b 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1403,11 +1403,11 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv 
*priv,
 
if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS)
cfd->flags |= CANFD_BRS;
 
dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags);
-   cfd->len = can_dlc2len(get_canfd_dlc(dlc));
+   cfd->len = can_dlc2len(dlc);
} else {
if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)
cfd->can_id |= CAN_RTR_FLAG;
 
cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC,
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c 
b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index 399e9698ffeb..906a3a340131 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -1249,11 +1249,11 @@ static void kvaser_usb_hydra_rx_msg_ext(const struct 
kvaser_usb *dev,
 
if (flags & KVASER_USB_HYDRA_CF_FLAG_OVERRUN)
kvaser_usb_can_rx_over_error(priv->netdev);
 
if (flags & KVASER_USB_HYDRA_CF_FLAG_FDF) {
-   cf->len = can_dlc2len(get_canfd_dlc(dlc));
+   cf->len = can_dlc2len(dlc);
if (flags & KVASER_USB_HYDRA_CF_FLAG_BRS)
cf->flags |= CANFD_BRS;
if (flags & KVASER_USB_HYDRA_CF_FLAG_ESI)
cf->flags |= CANFD_ESI;
} else {
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c 
b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 1f08dd22b3d5..1233ef20646a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -490,11 +490,11 @@ static int pcan_usb_fd_decode_canmsg(struct 
pcan_usb_fd_if *usb_if,
cfd->flags |= CANFD_BRS;
 
if (rx_msg_flags & PUCAN_MSG_ERROR_STATE_IND)
cfd->flags |= CANFD_ESI;
 
-   cfd->len = can_dlc2len(get_canfd_dlc(pucan_msg_get_dlc(rm)));
+   cfd->len = can_dlc2len(pucan_msg_get_dlc(rm));
} else {
/* CAN 2.0 frame case */
skb = alloc_can_skb(netdev, (struct can_frame **)&cfd);
if (!skb)
return -ENOMEM;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 9bc84c6978ec..802606e36b58 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -103,11 +103,10 @@ static inline unsigned int can_bit_time(const struct 
can_bittiming *bt)
  *
  * To be us

[PATCH v5 6/8] can: update documentation for DLC usage in Classical CAN

2020-11-09 Thread Oliver Hartkopp
The extension of struct can_frame with the len8_dlc element and the
can_dlc naming issue required an update of the documentation.

Additionally introduce the term 'Classical CAN' which has been established
by CAN in Automation to separate the original CAN2.0 A/B from CAN FD.

Updated some data structures and flags.

Signed-off-by: Oliver Hartkopp 
---
 Documentation/networking/can.rst | 68 
 1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index 4895b0dd2714..f8dae662e454 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to 
different addressing
 the socket, you can read(2) and write(2) from/to the socket or use
 send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
 on the socket as usual. There are also CAN specific socket options
 described below.
 
-The basic CAN frame structure and the sockaddr structure are defined
-in include/linux/can.h:
+The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure
+and the sockaddr structure are defined in include/linux/can.h:
 
 .. code-block:: C
 
 struct can_frame {
 canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
-__u8can_dlc; /* frame payload length in byte (0 .. 8) */
+union {
+/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
+ * was previously named can_dlc so we need to carry that
+ * name for legacy support
+ */
+__u8 len;
+__u8 can_dlc; /* deprecated */
+};
 __u8__pad;   /* padding */
 __u8__res0;  /* reserved / padding */
-__u8__res1;  /* reserved / padding */
+__u8len8_dlc; /* optional DLC for 8 byte payload length (9 .. 
15) */
 __u8data[8] __attribute__((aligned(8)));
 };
 
+Remark: The len element contains the payload length in bytes and should be
+used instead of can_dlc. The deprecated can_dlc was misleadingly named as
+it always contained the plain payload length in bytes and not the so called
+'data length code' (DLC).
+
+To pass the raw DLC from/to a Classical CAN network device the len8_dlc
+element can contain values 9 .. 15 when the len element is 8 (the real
+payload length for all DLC values greater or equal to 8).
+
 The alignment of the (linear) payload data[] to a 64bit boundary
 allows the user to define their own structs and unions to easily access
 the CAN payload. There is no given byteorder on the CAN bus by
 default. A read(2) system call on a CAN_RAW socket transfers a
 struct can_frame to the user space.
@@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific interface:
 int can_ifindex;
 union {
 /* transport protocol class address info (e.g. ISOTP) */
 struct { canid_t rx_id, tx_id; } tp;
 
+/* J1939 address information */
+struct {
+/* 8 byte name when using dynamic addressing */
+__u64 name;
+
+/* pgn:
+ * 8 bit: PS in PDU2 case, else 0
+ * 8 bit: PF
+ * 1 bit: DP
+ * 1 bit: reserved
+ */
+__u32 pgn;
+
+/* 1 byte address */
+__u8 addr;
+} j1939;
+
 /* reserved for future CAN protocols address information */
 } can_addr;
 };
 
 To determine the interface index an appropriate ioctl() has to
@@ -369,11 +402,11 @@ bitrates for the arbitration phase and the payload phase 
of the CAN FD frame
 and up to 64 bytes of payload. This extended payload length breaks all the
 kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
 bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
 the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
 switches the socket into a mode that allows the handling of CAN FD frames
-and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`).
+and Classical CAN frames simultaneously (see :ref:`socketcan-rawfd`).
 
 The struct canfd_frame is defined in include/linux/can.h:
 
 .. code-block:: C
 
@@ -395,21 +428,21 @@ all structure elements can be used as-is - only the 
data[] becomes extended.
 When introducing the struct canfd_frame it turned out that the data length
 code (DLC) of the struct can_frame was used as a length information as the
 length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
 the easy handling of the length 

[PATCH v5 8/8] can-dev: add len8_dlc support for various CAN USB adapters

2020-11-09 Thread Oliver Hartkopp
Support the Classical CAN raw DLC functionality to send and receive DLC
values from 9 .. 15 on various Classical CAN capable USB network drivers:

- gs_usb
- pcan_usb
- pcan_usb_fd
- usb_8dev

Tested-by: Oliver Hartkopp 
Signed-off-by: Oliver Hartkopp 
---
 drivers/net/can/usb/gs_usb.c   |  8 ++--
 drivers/net/can/usb/peak_usb/pcan_usb.c|  8 ++--
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 17 -
 drivers/net/can/usb/usb_8dev.c |  9 ++---
 4 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 940589667a7f..cc0c30a5 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -330,10 +330,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
return;
 
cf->can_id = hf->can_id;
 
cf->len = can_cc_dlc2len(hf->len);
+   cf->len8_dlc = can_get_len8_dlc(dev->can.ctrlmode, cf->len,
+   hf->len);
+
memcpy(cf->data, hf->data, 8);
 
/* ERROR frames tell us information about the controller */
if (hf->can_id & CAN_ERR_FLAG)
gs_update_state(dev, cf);
@@ -502,11 +505,12 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
hf->channel = dev->channel;
 
cf = (struct can_frame *)skb->data;
 
hf->can_id = cf->can_id;
-   hf->len = cf->len;
+   hf->len = can_get_cc_dlc(dev->can.ctrlmode, cf->len, cf->len8_dlc);
+
memcpy(hf->data, cf->data, cf->len);
 
usb_fill_bulk_urb(urb, dev->udev,
  usb_sndbulkpipe(dev->udev, GSUSB_ENDPOINT_OUT),
  hf,
@@ -856,11 +860,11 @@ static struct gs_can *gs_make_candev(unsigned int channel,
dev->can.state = CAN_STATE_STOPPED;
dev->can.clock.freq = bt_const->fclk_can;
dev->can.bittiming_const = &dev->bt_const;
dev->can.do_set_bittiming = gs_usb_set_bittiming;
 
-   dev->can.ctrlmode_supported = 0;
+   dev->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC;
 
if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY)
dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
 
if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK)
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c 
b/drivers/net/can/usb/peak_usb/pcan_usb.c
index ec34f87cc02c..5a8dffacc24e 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -733,10 +733,12 @@ static int pcan_usb_decode_data(struct 
pcan_usb_msg_context *mc, u8 status_len)
 
cf->can_id = le16_to_cpu(tmp16) >> 5;
}
 
cf->len = can_cc_dlc2len(rec_len);
+   cf->len8_dlc = can_get_len8_dlc(mc->pdev->dev.can.ctrlmode, cf->len,
+   rec_len);
 
/* Only first packet timestamp is a word */
if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx))
goto decode_failed;
 
@@ -836,11 +838,12 @@ static int pcan_usb_encode_msg(struct peak_usb_device 
*dev, struct sk_buff *skb,
obuf[1] = 1;
 
pc = obuf + PCAN_USB_MSG_HEADER_LEN;
 
/* status/len byte */
-   *pc = cf->len;
+   *pc = can_get_cc_dlc(dev->can.ctrlmode, cf->len, cf->len8_dlc);
+
if (cf->can_id & CAN_RTR_FLAG)
*pc |= PCAN_USB_STATUSLEN_RTR;
 
/* can id */
if (cf->can_id & CAN_EFF_FLAG) {
@@ -990,11 +993,12 @@ static const struct can_bittiming_const pcan_usb_const = {
 const struct peak_usb_adapter pcan_usb = {
.name = "PCAN-USB",
.device_id = PCAN_USB_PRODUCT_ID,
.ctrl_count = 1,
.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
- CAN_CTRLMODE_BERR_REPORTING,
+ CAN_CTRLMODE_BERR_REPORTING |
+ CAN_CTRLMODE_CC_LEN8_DLC,
.clock = {
.freq = PCAN_USB_CRYSTAL_HZ / 2 ,
},
.bittiming_const = &pcan_usb_const,
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c 
b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 761e78d8e647..8020071c9067 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -492,16 +492,21 @@ static int pcan_usb_fd_decode_canmsg(struct 
pcan_usb_fd_if *usb_if,
if (rx_msg_flags & PUCAN_MSG_ERROR_STATE_IND)
cfd->flags |= CANFD_ESI;
 
cfd->len = can_fd_dlc2len(pucan_msg_get_dlc(rm));
} else {
+   struct can_frame *cf;
+
/* CAN 2.0 frame case */
skb = alloc_can_skb(netdev, (struct can_frame **)&cfd);
if (!skb)
return -ENOMEM;
 
cfd->len = can_cc_dlc2len(pucan_msg_get_dlc(rm));
+   cf = (struct can_fram

[PATCH v5 2/8] can: rename get_can_dlc() macro with can_cc_dlc2len()

2020-11-09 Thread Oliver Hartkopp
The get_can_dlc() macro is used to ensure the payload length information of
the Classical CAN frame to be max 8 bytes (the CAN_MAX_DLEN).

Rename the macro and use the correct constant in preparation of the len/dlc
cleanup for Classical CAN frames.

Signed-off-by: Oliver Hartkopp 
---
 drivers/net/can/at91_can.c| 2 +-
 drivers/net/can/c_can/c_can.c | 2 +-
 drivers/net/can/cc770/cc770.c | 2 +-
 drivers/net/can/flexcan.c | 2 +-
 drivers/net/can/grcan.c   | 2 +-
 drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +-
 drivers/net/can/janz-ican3.c  | 4 ++--
 drivers/net/can/m_can/m_can.c | 2 +-
 drivers/net/can/mscan/mscan.c | 2 +-
 drivers/net/can/pch_can.c | 4 ++--
 drivers/net/can/peak_canfd/peak_canfd.c   | 2 +-
 drivers/net/can/rcar/rcar_can.c   | 2 +-
 drivers/net/can/rcar/rcar_canfd.c | 4 ++--
 drivers/net/can/sja1000/sja1000.c | 2 +-
 drivers/net/can/softing/softing_main.c| 2 +-
 drivers/net/can/spi/hi311x.c  | 2 +-
 drivers/net/can/spi/mcp251x.c | 4 ++--
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c| 2 +-
 drivers/net/can/sun4i_can.c   | 2 +-
 drivers/net/can/ti_hecc.c | 2 +-
 drivers/net/can/usb/ems_usb.c | 2 +-
 drivers/net/can/usb/esd_usb2.c| 2 +-
 drivers/net/can/usb/gs_usb.c  | 2 +-
 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 4 ++--
 drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c  | 4 ++--
 drivers/net/can/usb/mcba_usb.c| 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb.c   | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 2 +-
 drivers/net/can/usb/ucan.c| 8 
 drivers/net/can/usb/usb_8dev.c| 2 +-
 drivers/net/can/xilinx_can.c  | 4 ++--
 include/linux/can/dev.h   | 8 
 32 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index c14de95d2ca7..db06254f8eb7 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -578,11 +578,11 @@ static void at91_read_mb(struct net_device *dev, unsigned 
int mb,
cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK;
 
reg_msr = at91_read(priv, AT91_MSR(mb));
-   cf->can_dlc = get_can_dlc((reg_msr >> 16) & 0xf);
+   cf->can_dlc = can_cc_dlc2len((reg_msr >> 16) & 0xf);
 
if (reg_msr & AT91_MSR_MRTR)
cf->can_id |= CAN_RTR_FLAG;
else {
*(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 1ccdbe89585b..56cc705959ea 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -395,11 +395,11 @@ static int c_can_read_msg_object(struct net_device *dev, 
int iface, u32 ctrl)
if (!skb) {
stats->rx_dropped++;
return -ENOMEM;
}
 
-   frame->can_dlc = get_can_dlc(ctrl & 0x0F);
+   frame->can_dlc = can_cc_dlc2len(ctrl & 0x0F);
 
arb = priv->read_reg32(priv, C_CAN_IFACE(ARB1_REG, iface));
 
if (arb & IF_ARB_MSGXTD)
frame->can_id = (arb & CAN_EFF_MASK) | CAN_EFF_FLAG;
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 07e2b8df5153..3fd2a276dd93 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -484,11 +484,11 @@ static void cc770_rx(struct net_device *dev, unsigned int 
mo, u8 ctrl1)
id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 8;
id >>= 5;
}
 
cf->can_id = id;
-   cf->can_dlc = get_can_dlc((config & 0xf0) >> 4);
+   cf->can_dlc = can_cc_dlc2len((config & 0xf0) >> 4);
for (i = 0; i < cf->can_dlc; i++)
cf->data[i] = cc770_read_reg(priv, msgobj[mo].data[i]);
}
 
stats->rx_packets++;
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 881799bd9c5e..d77276cd1d99 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1001,11 +1001,11 @@ static struct sk_buff *flexcan_mailbox_read(struct 
can_rx_offload *offload,
cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0xf));
 
if (reg_ctrl & FLEXCAN_MB_CNT_BRS)
cfd->flags |= CANFD_BRS;
} else {
-   cfd->len = get_can_dlc((reg_ctrl >> 16) & 0xf);
+   cfd->len = can_cc_dlc2len((reg_ctrl >> 16) & 0xf);
 
   

[PATCH v5 1/8] can: add optional DLC element to Classical CAN frame structure

2020-11-09 Thread Oliver Hartkopp
ISO 11898-1 Chapter 8.4.2.3 defines a 4 bit data length code (DLC) table which
maps the DLC to the payload length of the CAN frame in bytes:

DLC  ->  payload length
0 .. 8   ->  0 .. 8
9 .. 15  ->  8

Although the DLC values 8 .. 15 in Classical CAN always result in a payload
length of 8 bytes these DLC values are transparently transmitted on the CAN
bus. As the struct can_frame only provides a 'len' element (formerly 'can_dlc')
which contains the plain payload length ( 0 .. 8 ) of the CAN frame, the raw
DLC is not visible to the application programmer, e.g. for testing use-cases.

To access the raw DLC values 9 .. 15 the len8_dlc element is introduced, which
is only valid when the payload length 'len' is 8 and the DLC is greater than 8.

The len8_dlc element is filled by the CAN interface driver and used for CAN
frame creation by the CAN driver when the CAN_CTRLMODE_CC_LEN8_DLC flag is
supported by the driver and enabled via netlink configuration interface.

Reported-by: Vincent Mailhol 
Signed-off-by: Oliver Hartkopp 
---
 include/uapi/linux/can.h | 38 
 include/uapi/linux/can/netlink.h |  1 +
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 6a6d2c7655ff..f75238ac6dce 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -82,34 +82,44 @@ typedef __u32 canid_t;
  */
 typedef __u32 can_err_mask_t;
 
 /* CAN payload length and DLC definitions according to ISO 11898-1 */
 #define CAN_MAX_DLC 8
+#define CAN_MAX_RAW_DLC 15
 #define CAN_MAX_DLEN 8
 
 /* CAN FD payload length and DLC definitions according to ISO 11898-7 */
 #define CANFD_MAX_DLC 15
 #define CANFD_MAX_DLEN 64
 
 /**
- * struct can_frame - basic CAN frame structure
- * @can_id:  CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
- * @can_dlc: frame payload length in byte (0 .. 8) aka data length code
- *   N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
- *   mapping of the 'data length code' to the real payload length
- * @__pad:   padding
- * @__res0:  reserved / padding
- * @__res1:  reserved / padding
- * @data:CAN frame payload (up to 8 byte)
+ * struct can_frame - Classical CAN frame structure (aka CAN 2.0B)
+ * @can_id:   CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition
+ * @len:  CAN frame payload length in byte (0 .. 8)
+ * @can_dlc:  deprecated name for CAN frame payload length in byte (0 .. 8)
+ * @__pad:padding
+ * @__res0:   reserved / padding
+ * @len8_dlc: optional DLC value (9 .. 15) at 8 byte payload length
+ *len8_dlc contains values from 9 .. 15 when the payload length is
+ *8 bytes but the DLC value (see ISO 11898-1) is greater then 8.
+ *CAN_CTRLMODE_CC_LEN8_DLC flag has to be enabled in CAN driver.
+ * @data: CAN frame payload (up to 8 byte)
  */
 struct can_frame {
canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
-   __u8can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
-   __u8__pad;   /* padding */
-   __u8__res0;  /* reserved / padding */
-   __u8__res1;  /* reserved / padding */
-   __u8data[CAN_MAX_DLEN] __attribute__((aligned(8)));
+   union {
+   /* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
+* was previously named can_dlc so we need to carry that
+* name for legacy support
+*/
+   __u8 len;
+   __u8 can_dlc; /* deprecated */
+   };
+   __u8 __pad; /* padding */
+   __u8 __res0; /* reserved / padding */
+   __u8 len8_dlc; /* optional DLC for 8 byte payload length (9 .. 15) */
+   __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8)));
 };
 
 /*
  * defined bits for canfd_frame.flags
  *
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 6f598b73839e..f730d443b918 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -98,10 +98,11 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_ONE_SHOT  0x08/* One-Shot mode */
 #define CAN_CTRLMODE_BERR_REPORTING0x10/* Bus-error reporting */
 #define CAN_CTRLMODE_FD0x20/* CAN FD mode */
 #define CAN_CTRLMODE_PRESUME_ACK   0x40/* Ignore missing CAN ACKs */
 #define CAN_CTRLMODE_FD_NON_ISO0x80/* CAN FD in non-ISO 
mode */
+#define CAN_CTRLMODE_CC_LEN8_DLC   0x100   /* Classic CAN DLC option */
 
 /*
  * CAN device statistics
  */
 struct can_device_stats {
-- 
2.28.0



[PATCH v5 0/8] Introduce optional DLC element for Classic CAN

2020-11-09 Thread Oliver Hartkopp
Introduce improved DLC handling for Classic CAN with introduces a new
element 'len8_dlc' to the struct can_frame and additionally rename
the 'can_dlc' element to 'len' as it represents a plain payload length.

Before implementing the CAN_CTRLMODE_CC_LEN8_DLC handling on driver level
this patch set cleans up and renames the relevant code.

No functional changes.

This patch set is based on kernel/git/netdev/net-next.git

Changes in v2:
  - rephrase commit message of patch 4 about can_dlc replacement

Changes in v3:
  - remove unnecessarily introduced u8 cast in flexcan.c

Changes in v4:
  - adopt phrasing suggestions from Vincent Mailhol
  - separate and extend CAN documentation (Documentation/networking/can.rst)
  - add new patches for len8_dlc handling for CAN drivers
  - add new helpers in include/linux/can/dev.h
  - add len8_dlc support for various CAN USB adapters as reference

Changes in v5:
  - rename CAN FD related can_len2dlc and can_dlc2len helpers so that they
fit to the renamed can_cc_dlc2len helper for Classical CAN
(suggested by Vincent Mailhol)

Oliver Hartkopp (8):
  can: add optional DLC element to Classical CAN frame structure
  can: rename get_can_dlc() macro with can_cc_dlc2len()
  can: remove obsolete get_canfd_dlc() macro
  can: replace can_dlc as variable/element for payload length
  can: rename CAN FD related can_len2dlc and can_dlc2len helpers
  can: update documentation for DLC usage in Classical CAN
  can-dev: introduce helpers to access Classical CAN DLC values
  can-dev: add len8_dlc support for various CAN USB adapters

 Documentation/networking/can.rst  | 70 ++-
 drivers/net/can/at91_can.c| 14 ++--
 drivers/net/can/c_can/c_can.c | 20 +++---
 drivers/net/can/cc770/cc770.c | 14 ++--
 drivers/net/can/dev.c | 16 ++---
 drivers/net/can/flexcan.c |  6 +-
 drivers/net/can/grcan.c   | 10 +--
 drivers/net/can/ifi_canfd/ifi_canfd.c | 10 +--
 drivers/net/can/janz-ican3.c  | 20 +++---
 drivers/net/can/kvaser_pciefd.c   | 10 +--
 drivers/net/can/m_can/m_can.c | 12 ++--
 drivers/net/can/mscan/mscan.c | 20 +++---
 drivers/net/can/pch_can.c | 14 ++--
 drivers/net/can/peak_canfd/peak_canfd.c   | 16 ++---
 drivers/net/can/rcar/rcar_can.c   | 14 ++--
 drivers/net/can/rcar/rcar_canfd.c | 12 ++--
 drivers/net/can/rx-offload.c  |  2 +-
 drivers/net/can/sja1000/sja1000.c | 10 +--
 drivers/net/can/slcan.c   | 32 -
 drivers/net/can/softing/softing_fw.c  |  2 +-
 drivers/net/can/softing/softing_main.c| 14 ++--
 drivers/net/can/spi/hi311x.c  | 20 +++---
 drivers/net/can/spi/mcp251x.c | 20 +++---
 .../net/can/spi/mcp251xfd/mcp251xfd-core.c| 10 +--
 drivers/net/can/sun4i_can.c   | 10 +--
 drivers/net/can/ti_hecc.c |  8 +--
 drivers/net/can/usb/ems_usb.c | 16 ++---
 drivers/net/can/usb/esd_usb2.c| 16 ++---
 drivers/net/can/usb/gs_usb.c  | 20 +++---
 .../net/can/usb/kvaser_usb/kvaser_usb_core.c  |  2 +-
 .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 24 +++
 .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c  | 22 +++---
 drivers/net/can/usb/mcba_usb.c| 10 +--
 drivers/net/can/usb/peak_usb/pcan_usb.c   | 20 +++---
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 29 +---
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c   | 14 ++--
 drivers/net/can/usb/ucan.c| 20 +++---
 drivers/net/can/usb/usb_8dev.c| 21 +++---
 drivers/net/can/xilinx_can.c  | 16 ++---
 include/linux/can/dev.h   | 32 +++--
 include/linux/can/dev/peak_canfd.h|  2 +-
 include/uapi/linux/can.h  | 38 ++
 include/uapi/linux/can/netlink.h  |  1 +
 net/can/af_can.c  |  2 +-
 net/can/gw.c  |  2 +-
 net/can/j1939/main.c  |  4 +-
 46 files changed, 400 insertions(+), 317 deletions(-)

-- 
2.28.0



[PATCH v5 4/8] can: replace can_dlc as variable/element for payload length

2020-11-09 Thread Oliver Hartkopp
The naming of can_dlc as element of struct can_frame and also as variable
name is misleading as it claims to be a 'data length CODE' but in reality
it always was a plain data length.

With the indroduction of a new 'len' element in struct can_frame we can now
remove can_dlc as name and make clear which of the former uses was a plain
length (-> 'len') or a data length code (-> 'dlc') value.

Signed-off-by: Oliver Hartkopp 
---
 drivers/net/can/at91_can.c| 14 
 drivers/net/can/c_can/c_can.c | 20 ++--
 drivers/net/can/cc770/cc770.c | 14 
 drivers/net/can/dev.c | 10 +++---
 drivers/net/can/grcan.c   | 10 +++---
 drivers/net/can/ifi_canfd/ifi_canfd.c |  4 +--
 drivers/net/can/janz-ican3.c  | 20 ++--
 drivers/net/can/kvaser_pciefd.c   |  4 +--
 drivers/net/can/m_can/m_can.c |  4 +--
 drivers/net/can/mscan/mscan.c | 20 ++--
 drivers/net/can/pch_can.c | 12 +++
 drivers/net/can/peak_canfd/peak_canfd.c   | 12 +++
 drivers/net/can/rcar/rcar_can.c   | 14 
 drivers/net/can/rcar/rcar_canfd.c |  4 +--
 drivers/net/can/rx-offload.c  |  2 +-
 drivers/net/can/sja1000/sja1000.c | 10 +++---
 drivers/net/can/slcan.c   | 32 +--
 drivers/net/can/softing/softing_fw.c  |  2 +-
 drivers/net/can/softing/softing_main.c| 14 
 drivers/net/can/spi/hi311x.c  | 20 ++--
 drivers/net/can/spi/mcp251x.c | 18 +--
 drivers/net/can/sun4i_can.c   | 10 +++---
 drivers/net/can/ti_hecc.c |  8 ++---
 drivers/net/can/usb/ems_usb.c | 16 +-
 drivers/net/can/usb/esd_usb2.c| 16 +-
 drivers/net/can/usb/gs_usb.c  | 14 
 .../net/can/usb/kvaser_usb/kvaser_usb_core.c  |  2 +-
 .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 16 +-
 .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c  | 22 ++---
 drivers/net/can/usb/mcba_usb.c| 10 +++---
 drivers/net/can/usb/peak_usb/pcan_usb.c   | 14 
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c| 10 +++---
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c   | 14 
 drivers/net/can/usb/ucan.c| 14 
 drivers/net/can/usb/usb_8dev.c| 14 
 drivers/net/can/xilinx_can.c  | 10 +++---
 include/linux/can/dev.h   |  4 +--
 net/can/af_can.c  |  2 +-
 net/can/gw.c  |  2 +-
 net/can/j1939/main.c  |  4 +--
 40 files changed, 231 insertions(+), 231 deletions(-)

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index db06254f8eb7..5284f0ab3b06 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -466,11 +466,11 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
netdev_err(dev, "BUG! TX buffer full when queue awake!\n");
return NETDEV_TX_BUSY;
}
reg_mid = at91_can_id_to_reg_mid(cf->can_id);
reg_mcr = ((cf->can_id & CAN_RTR_FLAG) ? AT91_MCR_MRTR : 0) |
-   (cf->can_dlc << 16) | AT91_MCR_MTCR;
+   (cf->len << 16) | AT91_MCR_MTCR;
 
/* disable MB while writing ID (see datasheet) */
set_mb_mode(priv, mb, AT91_MB_MODE_DISABLED);
at91_write(priv, AT91_MID(mb), reg_mid);
set_mb_mode_prio(priv, mb, AT91_MB_MODE_TX, prio);
@@ -479,11 +479,11 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
at91_write(priv, AT91_MDH(mb), *(u32 *)(cf->data + 4));
 
/* This triggers transmission */
at91_write(priv, AT91_MCR(mb), reg_mcr);
 
-   stats->tx_bytes += cf->can_dlc;
+   stats->tx_bytes += cf->len;
 
/* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */
can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv));
 
/*
@@ -552,11 +552,11 @@ static void at91_rx_overflow_err(struct net_device *dev)
 
cf->can_id |= CAN_ERR_CRTL;
cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
 
stats->rx_packets++;
-   stats->rx_bytes += cf->can_dlc;
+   stats->rx_bytes += cf->len;
netif_receive_skb(skb);
 }
 
 /**
  * at91_read_mb - read CAN msg from mailbox (lowlevel impl)
@@ -578,11 +578,11 @@ static void at91_read_mb(struct net_device *dev, unsigned 
int mb,
cf->can_id = ((reg_mid >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
cf->can_id = (reg_mid >> 18) & CAN_SFF_MASK;
 
reg_msr = at91_read(priv, AT91_MSR(mb));
-   cf->can_dlc = can_cc_dlc2len((reg_msr >> 16) & 0xf);
+   cf->len = can_cc_dlc2len((reg_msr >> 16) & 0x

Re: [PATCH v4 4/7] can: replace can_dlc as variable/element for payload length

2020-11-09 Thread Oliver Hartkopp

Hi Vincent,

On 09.11.20 13:59, Vincent MAILHOL wrote:

On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote:

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index b2e8df8e4cb0..72671184a7a2 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -183,12 +183,12 @@ static inline void can_set_static_ctrlmode(struct 
net_device *dev,
 /* override MTU which was set by default in can_setup()? */
 if (static_mode & CAN_CTRLMODE_FD)
 dev->mtu = CANFD_MTU;
  }

-/* get data length from can_dlc with sanitized can_dlc */
-u8 can_dlc2len(u8 can_dlc);
+/* get data length from raw data length code (DLC) */


/*
  * convert a given data length code (dlc) of an FD CAN frame into a
  * valid data length of max. 64 bytes.
  */

I missed this point during my previous review: the can_dlc2len() function
is only valid for CAN FD frames. Comments should reflect this fact.


+u8 can_dlc2len(u8 dlc);


Concerning the name:
  * can_get_cc_len() converts a Classical CAN frame DLC into a data
length.
  * can_dlc2len() converts an FD CAN frame DLC into a data length.

Just realized that both macro/function do similar things so we could
think of a similar naming as well.
  * Example 1: can_get_cc_len() and can_get_fd_len()
  * Example 2: can_cc_dlc2len() and can_fd_dlc2len()


I like!

Patch set v5 is out now.

Thanks,
Oliver



Or we could simply leave things as they are, this is not a big issue
as long as the comments clearly state which one is for classical
frames and which one is for FD frames.



  /* map the sanitized data length to an appropriate data length code */
  u8 can_len2dlc(u8 len);


can_len2dlc() might be renamed (e.g. can_get_fd_dlc()) if Example 1
solution is chosen.


  struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int 
echo_skb_max,


Yours sincerely,
Vincent Mailhol



Re: [PATCH] net: tcp: ratelimit warnings in tcp_recvmsg

2020-11-09 Thread Eric Dumazet



On 11/9/20 3:48 PM, Menglong Dong wrote:
> On Mon, Nov 9, 2020 at 9:36 PM Eric Dumazet  wrote:
>>
>> I do not think this patch is useful. That is simply code churn.
>>
>> Can you trigger the WARN() in the latest upstream version ?
>> If yes this is a serious bug that needs urgent attention.
>>
>> Make sure you have backported all needed fixes into your kernel, if
>> you get this warning on a non pristine kernel.
> 
> Theoretically, this WARN() shouldn't be triggered in any branches.
> Somehow, it just happened in kernel v3.10. This really confused me. I
> wasn't able to keep tracing it, as it is a product environment.
> 
> I notice that the codes for tcp skb receiving didn't change much
> between v3.10 and the latest upstream version, and guess the latest
> version can be triggered too.
> 
> If something is fixed and this WARN() won't be triggered, just ignore me.
> 

Yes, I confirm this WARN() should not trigger.

The bug is not in tcp recvmsg(), that is why you do not see obvious
fix for this issue in 3.10



Re: [PATCH net-next 00/18] nexthop: Add support for nexthop objects offload

2020-11-09 Thread Ido Schimmel
On Fri, Nov 06, 2020 at 11:31:59AM -0800, Jakub Kicinski wrote:
> On Wed,  4 Nov 2020 15:30:22 +0200 Ido Schimmel wrote:
> > From: Ido Schimmel 
> > 
> > This patch set adds support for nexthop objects offload with a dummy
> > implementation over netdevsim. mlxsw support will be added later.
> > 
> > The general idea is very similar to route offload in that notifications
> > are sent whenever nexthop objects are changed. A listener can veto the
> > change and the error will be communicated to user space with extack.
> > 
> > To keep listeners as simple as possible, they not only receive
> > notifications for the nexthop object that is changed, but also for all
> > the other objects affected by this change. For example, when a single
> > nexthop is replaced, a replace notification is sent for the single
> > nexthop, but also for all the nexthop groups this nexthop is member in.
> > This relieves listeners from the need to track such dependencies.
> > 
> > To simplify things further for listeners, the notification info does not
> > contain the raw nexthop data structures (e.g., 'struct nexthop'), but
> > less complex data structures into which the raw data structures are
> > parsed into.
> 
> Applied, thank you!

Great, thank you. And thanks David for the awesome work on the nexthop
infrastructure.

> 
> BTW no need to follow up on my else-after-return comment, 
> just something to keep in mind.

Ack


Re: [PATCH] netfilter: conntrack: fix -Wformat

2020-11-09 Thread kernel test robot
Hi Nick,

I love your patch! Perhaps something to improve:

[auto build test WARNING on nf-next/master]
[also build test WARNING on nf/master ipvs/master v5.10-rc3 next-20201109]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Nick-Desaulniers/netfilter-conntrack-fix-Wformat/20201109-085104
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: riscv-randconfig-s031-20201109 (attached as .config)
compiler: riscv32-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-76-gf680124b-dirty
# 
https://github.com/0day-ci/linux/commit/407a53117fa32f8f17a73a51bced0e85f168acb4
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Nick-Desaulniers/netfilter-conntrack-fix-Wformat/20201109-085104
git checkout 407a53117fa32f8f17a73a51bced0e85f168acb4
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


"sparse warnings: (new ones prefixed by >>)"
>> net/netfilter/nf_conntrack_standalone.c:56:29: sparse: sparse: cast to 
>> restricted __be16
   net/netfilter/nf_conntrack_standalone.c:60:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:61:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:66:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:67:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:72:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:73:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:77:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:78:29: sparse: sparse: cast to 
restricted __be16
   net/netfilter/nf_conntrack_standalone.c:84:29: sparse: sparse: cast to 
restricted __be16

vim +56 net/netfilter/nf_conntrack_standalone.c

32  
33  #ifdef CONFIG_NF_CONNTRACK_PROCFS
34  void
35  print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
36  const struct nf_conntrack_l4proto *l4proto)
37  {
38  switch (tuple->src.l3num) {
39  case NFPROTO_IPV4:
40  seq_printf(s, "src=%pI4 dst=%pI4 ",
41 &tuple->src.u3.ip, &tuple->dst.u3.ip);
42  break;
43  case NFPROTO_IPV6:
44  seq_printf(s, "src=%pI6 dst=%pI6 ",
45 tuple->src.u3.ip6, tuple->dst.u3.ip6);
46  break;
47  default:
48  break;
49  }
50  
51  switch (l4proto->l4proto) {
52  case IPPROTO_ICMP:
53  seq_printf(s, "type=%u code=%u id=%hu ",
54 tuple->dst.u.icmp.type,
55 tuple->dst.u.icmp.code,
  > 56 (__be16)ntohs(tuple->src.u.icmp.id));
57  break;
58  case IPPROTO_TCP:
59  seq_printf(s, "sport=%hu dport=%hu ",
60 (__be16)ntohs(tuple->src.u.tcp.port),
61 (__be16)ntohs(tuple->dst.u.tcp.port));
62  break;
63  case IPPROTO_UDPLITE:
64  case IPPROTO_UDP:
65  seq_printf(s, "sport=%hu dport=%hu ",
66 (__be16)ntohs(tuple->src.u.udp.port),
67 (__be16)ntohs(tuple->dst.u.udp.port));
68  
69  break;
70  case IPPROTO_DCCP:
71  seq_printf(s, "sport=%hu dport=%hu ",
72 (__be16)ntohs(tuple->src.u.dccp.port),
73 (__be16)ntohs(tuple->dst.u.dccp.port));
74  break;
75  case IPPROTO_SCTP:
76  seq_printf(s, "sport=%hu dport=%hu ",
77 (__be16)ntohs(tuple->src.u.sctp.port),
78 (__be16)ntohs(tuple->dst.u.sctp.port));
79

[PATCH net-next] net: phy: aquantia: do not return an error on clearing pending IRQs

2020-11-09 Thread Ioana Ciornei
From: Ioana Ciornei 

The referenced commit added in .config_intr() the part of code which upon
configuration of the IRQ state it also clears up any pending IRQ. If
there were actually pending IRQs, a read on the IRQ status register will
return something non zero. This should not result in the callback
returning an error.

Fix this by returning an error only when the result of the
phy_read_mmd() is negative.

Fixes: e11ef96d44f1 ("net: phy: aquantia: remove the use of .ack_interrupt()")
Signed-off-by: Ioana Ciornei 
---
 drivers/net/phy/aquantia_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 345f70f9d39b..968dd43a2b1e 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -250,7 +250,7 @@ static int aqr_config_intr(struct phy_device *phydev)
if (en) {
/* Clear any pending interrupts before enabling them */
err = phy_read_mmd(phydev, MDIO_MMD_AN, 
MDIO_AN_TX_VEND_INT_STATUS2);
-   if (err)
+   if (err < 0)
return err;
}
 
@@ -273,7 +273,7 @@ static int aqr_config_intr(struct phy_device *phydev)
if (!en) {
/* Clear any pending interrupts after we have disabled them */
err = phy_read_mmd(phydev, MDIO_MMD_AN, 
MDIO_AN_TX_VEND_INT_STATUS2);
-   if (err)
+   if (err < 0)
return err;
}
 
-- 
2.28.0



Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.

2020-11-09 Thread Vlad Buslov


On Mon 09 Nov 2020 at 16:50, Marcelo Ricardo Leitner 
 wrote:
> On Mon, Nov 09, 2020 at 03:24:37PM +0200, Vlad Buslov wrote:
>> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote:
> ...
>> > @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY
>> >  To compile this code as a module, choose M here: the
>> >  module will be called act_tunnel_key.
>> >  
>> > +config NET_ACT_FRAG
>> > +  tristate "Packet fragmentation"
>> > +  depends on NET_CLS_ACT
>> > +  help
>> > + Say Y here to allow fragmenting big packets when outputting
>> > + with the mirred action.
>> > +
>> > +If unsure, say N.
>> > +
>> > +To compile this code as a module, choose M here: the
>> > +module will be called act_frag.
>> > +
>> 
>> Just wondering, what is the motivation for putting the frag code into
>> standalone module? It doesn't implement usual act_* interface and is not
>> user-configurable. To me it looks like functionality that belongs to
>> act_api. Am I missing something?
>
> It's the way we found so far for not "polluting" mirred/tc with L3
> functionality, per Cong's feedbacks on previous attempts. As for why
> not act_api, this is not some code that other actions can just re-use
> and that file is already quite big, so I thought act_frag would be
> better to keep it isolated/contained.

Hmmm okay.

>
> If act_frag is confusing, then maybe act_mirred_frag? It is a mirred
> plugin now, after all.

Would be even more confusing to me since the act_frag module code is
only directly accessed from act_ct and not act_mirred :)

Anyway, I don't have a strong opinion regarding this. Just wanted to
understand the motivation.

>
> ...
>> > +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
>> > + int (*xmit)(struct sk_buff *skb)))
>> > +{
>> > +  if (!tcf_xmit_hook_enabled())
>> > +  xchg(&tcf_xmit_hook, xmit_hook);
>> 
>> Marcelo, why did you suggest to use atomic operations to change
>> tcf_xmit_hook variable? It is not obvious to me after reading the code.
>
> I thought as a minimal way to not have problems on module removal, but
> your comment below proves it is not right/enough. :-)
>
>> 
>> > +  else if (xmit_hook != tcf_xmit_hook)
>> > +  return -EBUSY;
>> > +
>> > +  tcf_inc_xmit_hook();
>> > +
>> > +  return 0;
>> > +}
>> > +EXPORT_SYMBOL_GPL(tcf_set_xmit_hook);
>> > +
>> > +void tcf_clear_xmit_hook(void)
>> > +{
>> > +  tcf_dec_xmit_hook();
>> > +
>> > +  if (!tcf_xmit_hook_enabled())
>> > +  xchg(&tcf_xmit_hook, NULL);
>> > +}
>> > +EXPORT_SYMBOL_GPL(tcf_clear_xmit_hook);
>> > +
>> > +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff 
>> > *skb))
>> > +{
>> > +  if (tcf_xmit_hook_enabled())
>> 
>> Okay, so what happens here if tcf_xmit_hook is disabled concurrently? If
>> we get here from some rule that doesn't involve act_ct but uses
>> act_mirred and act_ct is concurrently removed decrementing last
>> reference to static branch and setting tcf_xmit_hook to NULL?
>
> Yeah.. good point. Thinking further now, what about using RCU for the
> hook? AFAICT it can cover the synchronization needed when clearing the
> pointer, tcf_set_xmit_hook() should do a module_get() and
> tcf_clear_xmit_hook() can delay a module_put(act_frag) as needed with
> call_rcu.

Wouldn't it be enough to just call synchronize_rcu() in
tcf_clear_xmit_hook() after setting tcf_xmit_hook to NULL? act_ct module
removal should be very rare, so synchronously waiting for rcu grace
period to complete is probably okay.

>
> I see tcf_mirred_act is already calling rcu_dereference_bh(), so
> it's already protected by rcu read here and calling tcf_xmit_hook()
> with xmit pointer should be fine. WDYT?

Yes, good idea.

>
>> 
>> > +  return tcf_xmit_hook(skb, xmit);
>> > +  else
>> > +  return xmit(skb);
>> > +}
>> > +EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);



答复: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread zhangqilong
> operation to deal with usage counter
> 
> On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong 
> wrote:
> >
> > In many case, we need to check return value of pm_runtime_get_sync,
> > but it brings a trouble to the usage counter processing. Many callers
> > forget to decrease the usage counter when it failed. It has been
> > discussed a lot[0][1]. So we add a function to deal with the usage
> > counter for better coding.
> >
> > [0]https://lkml.org/lkml/2020/6/14/88
> > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951
> > 48.10995-1-dinghao@zju.edu.cn/
> > Signed-off-by: Zhang Qilong 
> > ---
> >  include/linux/pm_runtime.h | 30 ++
> >  1 file changed, 30 insertions(+)
> >
> > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > index 4b708f4e8eed..6549ce764400 100644
> > --- a/include/linux/pm_runtime.h
> > +++ b/include/linux/pm_runtime.h
> > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device
> *dev)
> > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> >
> > +/**
> > + * pm_runtime_general_get - Bump up usage counter of a device and
> resume it.
> > + * @dev: Target device.
> > + *
> > + * Increase runtime PM usage counter of @dev first, and carry out
> > +runtime-resume
> > + * of it synchronously. If __pm_runtime_resume return negative
> > +value(device is in
> > + * error state), we to need decrease the usage counter before it
> > +return. If
> > + * __pm_runtime_resume return positive value, it means the runtime of
> > +device has
> > + * already been in active state, and we let the new wrapper return zero
> instead.
> > + *
> > + * The possible return values of this function is zero or negative value.
> > + * zero:
> > + *- it means resume succeeed or runtime of device has already been
> active, the
> > + *  runtime PM usage counter of @dev remains incremented.
> > + * negative:
> > + *- it means failure and the runtime PM usage counter of @dev has
> been balanced.
> 
> The kerneldoc above is kind of noisy and it is hard to figure out what the 
> helper
> really does from it.
> 
> You could basically say something like "Resume @dev synchronously and if that
> is successful, increment its runtime PM usage counter.  Return
> 0 if the runtime PM usage counter of @dev has been incremented or a negative
> error code otherwise."
> 

How about the following description.
/**
390  * pm_runtime_general_get - Bump up usage counter of a device and resume it.
391  * @dev: Target device.
392  *
393  * Increase runtime PM usage counter of @dev first, and carry out 
runtime-resume
394  * of it synchronously. If __pm_runtime_resume return negative value(device 
is in
395  * error state), we to need decrease the usage counter before it return. If
396  * __pm_runtime_resume return positive value, it means the runtime of 
device has
397  * already been in active state, and we let the new wrapper return zero 
instead.
398  *
399  * Resume @dev synchronously and if that is successful, and increment its 
runtime
400  * PM usage counter if it turn out to equal to 0. The runtime PM usage 
counter of
401  * @dev has been incremented or a negative error code otherwise.
402  */

Thanks,
Zhang

> > + */
> > +static inline int pm_runtime_general_get(struct device *dev)
> 
> What about pm_runtime_resume_and_get()?
> 

I think it's OK.

> > +{
> > +   int ret = 0;
> 
> This extra initialization is not necessary.
> 
> You can initialize ret to the __pm_runtime_resume() return value right away.
> 

OK, good idea.

> > +
> > +   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
> > +   if (ret < 0) {
> > +   pm_runtime_put_noidle(dev);
> > +   return ret;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> >  /**
> >   * pm_runtime_put - Drop device usage counter and queue up "idle check"
> if 0.
> >   * @dev: Target device.
> > --


Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Ulf Hansson
On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki  wrote:
>
> On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong  wrote:
> >
> > In many case, we need to check return value of pm_runtime_get_sync, but
> > it brings a trouble to the usage counter processing. Many callers forget
> > to decrease the usage counter when it failed. It has been discussed a
> > lot[0][1]. So we add a function to deal with the usage counter for better
> > coding.
> >
> > [0]https://lkml.org/lkml/2020/6/14/88
> > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
> > Signed-off-by: Zhang Qilong 
> > ---
> >  include/linux/pm_runtime.h | 30 ++
> >  1 file changed, 30 insertions(+)
> >
> > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > index 4b708f4e8eed..6549ce764400 100644
> > --- a/include/linux/pm_runtime.h
> > +++ b/include/linux/pm_runtime.h
> > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device 
> > *dev)
> > return __pm_runtime_resume(dev, RPM_GET_PUT);
> >  }
> >
> > +/**
> > + * pm_runtime_general_get - Bump up usage counter of a device and resume 
> > it.
> > + * @dev: Target device.
> > + *
> > + * Increase runtime PM usage counter of @dev first, and carry out 
> > runtime-resume
> > + * of it synchronously. If __pm_runtime_resume return negative 
> > value(device is in
> > + * error state), we to need decrease the usage counter before it return. If
> > + * __pm_runtime_resume return positive value, it means the runtime of 
> > device has
> > + * already been in active state, and we let the new wrapper return zero 
> > instead.
> > + *
> > + * The possible return values of this function is zero or negative value.
> > + * zero:
> > + *- it means resume succeeed or runtime of device has already been 
> > active, the
> > + *  runtime PM usage counter of @dev remains incremented.
> > + * negative:
> > + *- it means failure and the runtime PM usage counter of @dev has been 
> > balanced.
>
> The kerneldoc above is kind of noisy and it is hard to figure out what
> the helper really does from it.
>
> You could basically say something like "Resume @dev synchronously and
> if that is successful, increment its runtime PM usage counter.  Return
> 0 if the runtime PM usage counter of @dev has been incremented or a
> negative error code otherwise."
>
> > + */
> > +static inline int pm_runtime_general_get(struct device *dev)
>
> What about pm_runtime_resume_and_get()?

We already have pm_runtime_get_if_active() - so perhaps
pm_runtime_get_if_suspended() could be an option as well?

>
> > +{
> > +   int ret = 0;
>
> This extra initialization is not necessary.
>
> You can initialize ret to the __pm_runtime_resume() return value right away.
>
> > +
> > +   ret = __pm_runtime_resume(dev, RPM_GET_PUT);
> > +   if (ret < 0) {
> > +   pm_runtime_put_noidle(dev);
> > +   return ret;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> >  /**
> >   * pm_runtime_put - Drop device usage counter and queue up "idle check" if 
> > 0.
> >   * @dev: Target device.
> > --

Kind regards
Uffe


Re: [PATCH v5 net-next 3/3] net/sched: act_frag: add implict packet fragment support.

2020-11-09 Thread Vlad Buslov


On Mon 09 Nov 2020 at 16:54, wenxu  wrote:
> 在 2020/11/9 21:24, Vlad Buslov 写道:
>> On Sun 08 Nov 2020 at 01:30, we...@ucloud.cn wrote:
>>> From: wenxu 
>>>
>>> Currently kernel tc subsystem can do conntrack in act_ct. But when several
>>> fragment packets go through the act_ct, function tcf_ct_handle_fragments
>>> will defrag the packets to a big one. But the last action will redirect
>>> mirred to a device which maybe lead the reassembly big packet over the mtu
>>> of target device.
>>>
>>> This patch add support for a xmit hook to mirred, that gets executed before
>>> xmiting the packet. Then, when act_ct gets loaded, it configs that hook.
>>> The frag xmit hook maybe reused by other modules.
>>>
>>> Signed-off-by: wenxu 
>>> ---
>>> v2: Fix the crash for act_frag module without load
>>> v3: modify the kconfig describe and put tcf_xmit_hook_is_enabled
>>> in the tcf_dev_queue_xmit, and xchg atomic for tcf_xmit_hook
>>> v4: using skb_protocol and fix line length exceeds 80 columns
>>> v5: no change
>>>
>>>  include/net/act_api.h  |  16 +
>>>  net/sched/Kconfig  |  13 
>>>  net/sched/Makefile |   1 +
>>>  net/sched/act_api.c|  51 +++
>>>  net/sched/act_ct.c |   7 +++
>>>  net/sched/act_frag.c   | 164 
>>> +
>>>  net/sched/act_mirred.c |   2 +-
>>>  7 files changed, 253 insertions(+), 1 deletion(-)
>>>  create mode 100644 net/sched/act_frag.c
>>>
>>> diff --git a/include/net/act_api.h b/include/net/act_api.h
>>> index 8721492..403a618 100644
>>> --- a/include/net/act_api.h
>>> +++ b/include/net/act_api.h
>>> @@ -239,6 +239,22 @@ int tcf_action_check_ctrlact(int action, struct 
>>> tcf_proto *tp,
>>>  struct netlink_ext_ack *newchain);
>>>  struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
>>>  struct tcf_chain *newchain);
>>> +
>>> +int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff 
>>> *skb));
>>> +int tcf_set_xmit_hook(int (*xmit_hook)(struct sk_buff *skb,
>>> +  int (*xmit)(struct sk_buff *skb)));
>>> +void tcf_clear_xmit_hook(void);
>>> +
>>> +#if IS_ENABLED(CONFIG_NET_ACT_FRAG)
>>> +int tcf_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff 
>>> *skb));
>>> +#else
>>> +static inline int tcf_frag_xmit_hook(struct sk_buff *skb,
>>> +int (*xmit)(struct sk_buff *skb))
>>> +{
>>> +   return 0;
>>> +}
>>> +#endif
>>> +
>>>  #endif /* CONFIG_NET_CLS_ACT */
>>>  
>>>  static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
>>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>>> index a3b37d8..9a240c7 100644
>>> --- a/net/sched/Kconfig
>>> +++ b/net/sched/Kconfig
>>> @@ -974,9 +974,22 @@ config NET_ACT_TUNNEL_KEY
>>>   To compile this code as a module, choose M here: the
>>>   module will be called act_tunnel_key.
>>>  
>>> +config NET_ACT_FRAG
>>> +   tristate "Packet fragmentation"
>>> +   depends on NET_CLS_ACT
>>> +   help
>>> + Say Y here to allow fragmenting big packets when outputting
>>> + with the mirred action.
>>> +
>>> + If unsure, say N.
>>> +
>>> + To compile this code as a module, choose M here: the
>>> + module will be called act_frag.
>>> +
>> Just wondering, what is the motivation for putting the frag code into
>> standalone module? It doesn't implement usual act_* interface and is not
>> user-configurable. To me it looks like functionality that belongs to
>> act_api. Am I missing something?
>
> The fragment operation is an single L3 action. 
>
> So we put in an single modules. Maybe it is not proper to put in the act_api 
> directly.

Okay. I have no strong opinion regarding this so it is better for
maintainers to decide.

>
>>>  config NET_ACT_CT
>>> tristate "connection tracking tc action"
>>> depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE
>>> +   depends on NET_ACT_FRAG
>>> help
>>>   Say Y here to allow sending the packets to conntrack module.
>>>  
>>> diff --git a/net/sched/Makefile b/net/sched/Makefile
>>> index 66bbf9a..c146186 100644
>>> --- a/net/sched/Makefile
>>> +++ b/net/sched/Makefile
>>> @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
>>>  obj-$(CONFIG_NET_IFE_SKBPRIO)  += act_meta_skbprio.o
>>>  obj-$(CONFIG_NET_IFE_SKBTCINDEX)   += act_meta_skbtcindex.o
>>>  obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
>>> +obj-$(CONFIG_NET_ACT_FRAG) += act_frag.o
>>>  obj-$(CONFIG_NET_ACT_CT)   += act_ct.o
>>>  obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
>>>  obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
>>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>>> index f66417d..e7b501c 100644
>>> --- a/net/sched/act_api.c
>>> +++ b/net/sched/act_api.c
>>> @@ -22,6 +22,57 @@
>>>  #include 
>>>  #include 
>>>  
>>> +static int (*tcf_xmit_hook)(struct sk_buff *skb,
>>> +   int (*xmit)(struct 

Re: [PATCH v4 5/7] can: update documentation for DLC usage in Classical CAN

2020-11-09 Thread Oliver Hartkopp




On 09.11.20 15:50, Vincent MAILHOL wrote:

On Mon. 9 Nov 2020 at 19:26, Oliver Hartkopp wrote:


The extension of struct can_frame with the len8_dlc element and the
can_dlc naming issue required an update of the documentation.

Additionally introduce the term 'Classical CAN' which has been established
by CAN in Automation to separate the original CAN2.0 A/B from CAN FD.

Updated some data structures and flags.

Signed-off-by: Oliver Hartkopp 
---
  Documentation/networking/can.rst | 68 
  1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index ff05cbd05e0d..e17c6427bb3a 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -226,24 +226,40 @@ interface (which is different from TCP/IP due to 
different addressing
  the socket, you can read(2) and write(2) from/to the socket or use
  send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
  on the socket as usual. There are also CAN specific socket options
  described below.

-The basic CAN frame structure and the sockaddr structure are defined
-in include/linux/can.h:
+The Classical CAN frame structure (aka CAN 2.0B), the CAN FD frame structure
+and the sockaddr structure are defined in include/linux/can.h:

  .. code-block:: C

  struct can_frame {
  canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
-__u8can_dlc; /* frame payload length in byte (0 .. 8) */
+union {
+/* CAN frame payload length in byte (0 .. CAN_MAX_DLEN)
+ * was previously named can_dlc so we need to carry that
+ * name for legacy support
+ */
+__u8 len;
+__u8 can_dlc; /* deprecated */
+};
  __u8__pad;   /* padding */
  __u8__res0;  /* reserved / padding */
-__u8__res1;  /* reserved / padding */
+__u8len8_dlc; /* optional DLC for 8 byte payload length (9 .. 
15) */
  __u8data[8] __attribute__((aligned(8)));
  };

+Remark: The len element contains the payload length in bytes and should be
+used instead of can_dlc. The deprecated can_dlc was misleadingly named as
+it always contained the plain payload length in bytes and not the so called
+'data length code' (DLC).
+
+To pass the raw DLC from/to a Classical CAN network device the len8_dlc
+element can contain values 9 .. 15 when the len element is 8 (the real
+payload length for all DLC values greater or equal to 8).


The "Classical CAN network device" part could make the reader
misunderstand that FD capable controllers can not handle Classical CAN
frames with DLC greater than 8. All the CAN-FD controllers I am aware
of can emit both Classical and FD frames. On the contrary, some
Classical CAN controllers might not support sending DLCs greater than
8. Propose to add the nuance that this depends on the device property:

  +To pass the raw DLC from/to a capable network device
  +(c.f. cc-len8-dlc CAN device property), the len8_dlc element can
  +contain values 9 .. 15 when the len element is 8 (the real payload
  +length for all DLC values greater or equal to 8).



This section only describes the Classical CAN data structure. I also 
thought about it - but I did not want to overload it with device properties.



+
  The alignment of the (linear) payload data[] to a 64bit boundary
  allows the user to define their own structs and unions to easily access
  the CAN payload. There is no given byteorder on the CAN bus by
  default. A read(2) system call on a CAN_RAW socket transfers a
  struct can_frame to the user space.
@@ -258,10 +274,27 @@ PF_PACKET socket, that also binds to a specific interface:
  int can_ifindex;
  union {
  /* transport protocol class address info (e.g. ISOTP) */
  struct { canid_t rx_id, tx_id; } tp;

+/* J1939 address information */
+struct {
+/* 8 byte name when using dynamic addressing */
+__u64 name;
+
+/* pgn:
+ * 8 bit: PS in PDU2 case, else 0
+ * 8 bit: PF
+ * 1 bit: DP
+ * 1 bit: reserved
+ */
+__u32 pgn;
+
+/* 1 byte address */
+__u8 addr;
+} j1939;
+
  /* reserved for future CAN protocols address information 
*/
  } can_addr;
  };


This looks like some J1939 code. Did you mix your patches?



This belongs to "update data structures" in the commit message ;-)


  To determine the interface index an appropriate ioctl() has 

Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 4:50 PM Ulf Hansson  wrote:
>
> On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki  wrote:
> >
> > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong  wrote:
> > >
> > > In many case, we need to check return value of pm_runtime_get_sync, but
> > > it brings a trouble to the usage counter processing. Many callers forget
> > > to decrease the usage counter when it failed. It has been discussed a
> > > lot[0][1]. So we add a function to deal with the usage counter for better
> > > coding.
> > >
> > > [0]https://lkml.org/lkml/2020/6/14/88
> > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
> > > Signed-off-by: Zhang Qilong 
> > > ---
> > >  include/linux/pm_runtime.h | 30 ++
> > >  1 file changed, 30 insertions(+)
> > >
> > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > > index 4b708f4e8eed..6549ce764400 100644
> > > --- a/include/linux/pm_runtime.h
> > > +++ b/include/linux/pm_runtime.h
> > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device 
> > > *dev)
> > > return __pm_runtime_resume(dev, RPM_GET_PUT);
> > >  }
> > >
> > > +/**
> > > + * pm_runtime_general_get - Bump up usage counter of a device and resume 
> > > it.
> > > + * @dev: Target device.
> > > + *
> > > + * Increase runtime PM usage counter of @dev first, and carry out 
> > > runtime-resume
> > > + * of it synchronously. If __pm_runtime_resume return negative 
> > > value(device is in
> > > + * error state), we to need decrease the usage counter before it return. 
> > > If
> > > + * __pm_runtime_resume return positive value, it means the runtime of 
> > > device has
> > > + * already been in active state, and we let the new wrapper return zero 
> > > instead.
> > > + *
> > > + * The possible return values of this function is zero or negative value.
> > > + * zero:
> > > + *- it means resume succeeed or runtime of device has already been 
> > > active, the
> > > + *  runtime PM usage counter of @dev remains incremented.
> > > + * negative:
> > > + *- it means failure and the runtime PM usage counter of @dev has 
> > > been balanced.
> >
> > The kerneldoc above is kind of noisy and it is hard to figure out what
> > the helper really does from it.
> >
> > You could basically say something like "Resume @dev synchronously and
> > if that is successful, increment its runtime PM usage counter.  Return
> > 0 if the runtime PM usage counter of @dev has been incremented or a
> > negative error code otherwise."
> >
> > > + */
> > > +static inline int pm_runtime_general_get(struct device *dev)
> >
> > What about pm_runtime_resume_and_get()?
>
> We already have pm_runtime_get_if_active() - so perhaps
> pm_runtime_get_if_suspended() could be an option as well?

It doesn't work this way, though.

The "get" happens even if the device has not been suspended.


Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Rafael J. Wysocki
On Mon, Nov 9, 2020 at 4:50 PM zhangqilong  wrote:
>
> > operation to deal with usage counter
> >
> > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong 
> > wrote:
> > >
> > > In many case, we need to check return value of pm_runtime_get_sync,
> > > but it brings a trouble to the usage counter processing. Many callers
> > > forget to decrease the usage counter when it failed. It has been
> > > discussed a lot[0][1]. So we add a function to deal with the usage
> > > counter for better coding.
> > >
> > > [0]https://lkml.org/lkml/2020/6/14/88
> > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/202005200951
> > > 48.10995-1-dinghao@zju.edu.cn/
> > > Signed-off-by: Zhang Qilong 
> > > ---
> > >  include/linux/pm_runtime.h | 30 ++
> > >  1 file changed, 30 insertions(+)
> > >
> > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > > index 4b708f4e8eed..6549ce764400 100644
> > > --- a/include/linux/pm_runtime.h
> > > +++ b/include/linux/pm_runtime.h
> > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct device
> > *dev)
> > > return __pm_runtime_resume(dev, RPM_GET_PUT);  }
> > >
> > > +/**
> > > + * pm_runtime_general_get - Bump up usage counter of a device and
> > resume it.
> > > + * @dev: Target device.
> > > + *
> > > + * Increase runtime PM usage counter of @dev first, and carry out
> > > +runtime-resume
> > > + * of it synchronously. If __pm_runtime_resume return negative
> > > +value(device is in
> > > + * error state), we to need decrease the usage counter before it
> > > +return. If
> > > + * __pm_runtime_resume return positive value, it means the runtime of
> > > +device has
> > > + * already been in active state, and we let the new wrapper return zero
> > instead.
> > > + *
> > > + * The possible return values of this function is zero or negative value.
> > > + * zero:
> > > + *- it means resume succeeed or runtime of device has already been
> > active, the
> > > + *  runtime PM usage counter of @dev remains incremented.
> > > + * negative:
> > > + *- it means failure and the runtime PM usage counter of @dev has
> > been balanced.
> >
> > The kerneldoc above is kind of noisy and it is hard to figure out what the 
> > helper
> > really does from it.
> >
> > You could basically say something like "Resume @dev synchronously and if 
> > that
> > is successful, increment its runtime PM usage counter.  Return
> > 0 if the runtime PM usage counter of @dev has been incremented or a negative
> > error code otherwise."
> >
>
> How about the following description.
> /**
> 390  * pm_runtime_general_get - Bump up usage counter of a device and resume 
> it.
> 391  * @dev: Target device.
> 392  *
> 393  * Increase runtime PM usage counter of @dev first, and carry out 
> runtime-resume
> 394  * of it synchronously. If __pm_runtime_resume return negative 
> value(device is in
> 395  * error state), we to need decrease the usage counter before it return. 
> If
> 396  * __pm_runtime_resume return positive value, it means the runtime of 
> device has
> 397  * already been in active state, and we let the new wrapper return zero 
> instead.
> 398  *

If you add the paragraph below, the one above becomes redundant IMV.

> 399  * Resume @dev synchronously and if that is successful, and increment its 
> runtime

"Resume @dev synchronously and if that is successful, increment its runtime"

(drop the extra "and").

> 400  * PM usage counter if it turn out to equal to 0. The runtime PM usage 
> counter of

The "if it turn out to equal to 0" phrase is redundant (and the
grammar in it is incorrect).

> 401  * @dev has been incremented or a negative error code otherwise.
> 402  */

Why don't you use what I said verbatim?


[PATCH RFC] SUNRPC: Use zero-copy to perform socket send operations

2020-11-09 Thread Chuck Lever
Daire Byrne reports a ~50% aggregrate throughput regression on his
Linux NFS server after commit da1661b93bf4 ("SUNRPC: Teach server to
use xprt_sock_sendmsg for socket sends"), which replaced
kernel_send_page() calls in NFSD's socket send path with calls to
sock_sendmsg() using iov_iter.

Investigation showed that tcp_sendmsg() was not using zero-copy to
send the xdr_buf's bvec pages, but instead was relying on memcpy.

Set up the socket and each msghdr that bears bvec pages to use the
zero-copy mechanism in tcp_sendmsg.

Reported-by: Daire Byrne 
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439
Fixes: da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket 
sends")
Signed-off-by: Chuck Lever 
---
 net/sunrpc/socklib.c  |5 -
 net/sunrpc/svcsock.c  |1 +
 net/sunrpc/xprtsock.c |1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

This patch does not fully resolve the issue. Daire reports high
softIRQ activity after the patch is applied, and this activity
seems to prevent full restoration of previous performance.


diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index d52313af82bc..af47596a7bdd 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -226,9 +226,12 @@ static int xprt_send_pagedata(struct socket *sock, struct 
msghdr *msg,
if (err < 0)
return err;
 
+   msg->msg_flags |= MSG_ZEROCOPY;
iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec, xdr_buf_pagecount(xdr),
  xdr->page_len + xdr->page_base);
-   return xprt_sendmsg(sock, msg, base + xdr->page_base);
+   err = xprt_sendmsg(sock, msg, base + xdr->page_base);
+   msg->msg_flags &= ~MSG_ZEROCOPY;
+   return err;
 }
 
 /* Common case:
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c2752e2b9ce3..c814b4953b15 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1176,6 +1176,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct 
svc_serv *serv)
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
+   sock_set_flag(sk, SOCK_ZEROCOPY);
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7090bbee0ec5..343c6396b297 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2175,6 +2175,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt 
*xprt, struct socket *sock)
 
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
+   sock_set_flag(sk, SOCK_ZEROCOPY);
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
xprt_clear_connected(xprt);




Re: [PATCH v2 1/2] PM: runtime: Add a general runtime get sync operation to deal with usage counter

2020-11-09 Thread Ulf Hansson
On Mon, 9 Nov 2020 at 16:54, Rafael J. Wysocki  wrote:
>
> On Mon, Nov 9, 2020 at 4:50 PM Ulf Hansson  wrote:
> >
> > On Mon, 9 Nov 2020 at 16:20, Rafael J. Wysocki  wrote:
> > >
> > > On Mon, Nov 9, 2020 at 4:00 PM Zhang Qilong  
> > > wrote:
> > > >
> > > > In many case, we need to check return value of pm_runtime_get_sync, but
> > > > it brings a trouble to the usage counter processing. Many callers forget
> > > > to decrease the usage counter when it failed. It has been discussed a
> > > > lot[0][1]. So we add a function to deal with the usage counter for 
> > > > better
> > > > coding.
> > > >
> > > > [0]https://lkml.org/lkml/2020/6/14/88
> > > > [1]https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/
> > > > Signed-off-by: Zhang Qilong 
> > > > ---
> > > >  include/linux/pm_runtime.h | 30 ++
> > > >  1 file changed, 30 insertions(+)
> > > >
> > > > diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> > > > index 4b708f4e8eed..6549ce764400 100644
> > > > --- a/include/linux/pm_runtime.h
> > > > +++ b/include/linux/pm_runtime.h
> > > > @@ -386,6 +386,36 @@ static inline int pm_runtime_get_sync(struct 
> > > > device *dev)
> > > > return __pm_runtime_resume(dev, RPM_GET_PUT);
> > > >  }
> > > >
> > > > +/**
> > > > + * pm_runtime_general_get - Bump up usage counter of a device and 
> > > > resume it.
> > > > + * @dev: Target device.
> > > > + *
> > > > + * Increase runtime PM usage counter of @dev first, and carry out 
> > > > runtime-resume
> > > > + * of it synchronously. If __pm_runtime_resume return negative 
> > > > value(device is in
> > > > + * error state), we to need decrease the usage counter before it 
> > > > return. If
> > > > + * __pm_runtime_resume return positive value, it means the runtime of 
> > > > device has
> > > > + * already been in active state, and we let the new wrapper return 
> > > > zero instead.
> > > > + *
> > > > + * The possible return values of this function is zero or negative 
> > > > value.
> > > > + * zero:
> > > > + *- it means resume succeeed or runtime of device has already been 
> > > > active, the
> > > > + *  runtime PM usage counter of @dev remains incremented.
> > > > + * negative:
> > > > + *- it means failure and the runtime PM usage counter of @dev has 
> > > > been balanced.
> > >
> > > The kerneldoc above is kind of noisy and it is hard to figure out what
> > > the helper really does from it.
> > >
> > > You could basically say something like "Resume @dev synchronously and
> > > if that is successful, increment its runtime PM usage counter.  Return
> > > 0 if the runtime PM usage counter of @dev has been incremented or a
> > > negative error code otherwise."
> > >
> > > > + */
> > > > +static inline int pm_runtime_general_get(struct device *dev)
> > >
> > > What about pm_runtime_resume_and_get()?
> >
> > We already have pm_runtime_get_if_active() - so perhaps
> > pm_runtime_get_if_suspended() could be an option as well?
>
> It doesn't work this way, though.
>
> The "get" happens even if the device has not been suspended.

Yes, that's right - so pm_runtime_resume_and_get() is probably the
best we can pick then.

Kind regards
Uffe


  1   2   3   4   5   >