date:20250311

[PATCH v4 net-next 05/14] net: enetc: add debugfs interface to dump MAC filter

2025-03-11 Thread Wei Fang

ENETC's MAC filter consists of hash MAC filter and exact MAC filter. Hash
MAC filter is a 64-entry hash table consisting of two 32-bit registers.
Exact MAC filter is implemented by configuring MAC address filter table
through command BD ring. The table is stored in ENETC's internal memory
and needs to be read through command BD ring. In order to facilitate
debugging, added a debugfs interface to get the relevant information
about MAC filter.

Signed-off-by: Wei Fang 
---
 drivers/net/ethernet/freescale/enetc/Makefile |  1 +
 drivers/net/ethernet/freescale/enetc/enetc.h  |  1 +
 .../ethernet/freescale/enetc/enetc4_debugfs.c | 93 +++
 .../ethernet/freescale/enetc/enetc4_debugfs.h | 20 
 .../net/ethernet/freescale/enetc/enetc4_pf.c  |  4 +
 5 files changed, 119 insertions(+)
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc4_debugfs.h

diff --git a/drivers/net/ethernet/freescale/enetc/Makefile 
b/drivers/net/ethernet/freescale/enetc/Makefile
index 707a68e26971..f1c5ad45fd76 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -16,6 +16,7 @@ fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
 
 obj-$(CONFIG_NXP_ENETC4) += nxp-enetc4.o
 nxp-enetc4-y := enetc4_pf.o
+nxp-enetc4-$(CONFIG_DEBUG_FS) += enetc4_debugfs.o
 
 obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
 fsl-enetc-vf-y := enetc_vf.o
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h 
b/drivers/net/ethernet/freescale/enetc/enetc.h
index 4dba91408e3d..ca1bc85c0ac9 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -318,6 +318,7 @@ struct enetc_si {
struct enetc_mac_filter mac_filter[MADDR_TYPE];
struct workqueue_struct *workqueue;
struct work_struct rx_mode_task;
+   struct dentry *debugfs_root;
 };
 
 #define ENETC_SI_ALIGN 32
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c 
b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c
new file mode 100644
index ..3a660c80344a
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_debugfs.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright 2025 NXP */
+
+#include 
+#include 
+#include 
+
+#include "enetc_pf.h"
+#include "enetc4_debugfs.h"
+
+#define is_en(x)   (x) ? "Enabled" : "Disabled"
+
+static void enetc_show_si_mac_hash_filter(struct seq_file *s, int i)
+{
+   struct enetc_si *si = s->private;
+   struct enetc_hw *hw = &si->hw;
+   u32 hash_h, hash_l;
+
+   hash_l = enetc_port_rd(hw, ENETC4_PSIUMHFR0(i));
+   hash_h = enetc_port_rd(hw, ENETC4_PSIUMHFR1(i));
+   seq_printf(s, "SI %d unicast MAC hash filter: 0x%08x%08x\n",
+  i, hash_h, hash_l);
+
+   hash_l = enetc_port_rd(hw, ENETC4_PSIMMHFR0(i));
+   hash_h = enetc_port_rd(hw, ENETC4_PSIMMHFR1(i));
+   seq_printf(s, "SI %d multicast MAC hash filter: 0x%08x%08x\n",
+  i, hash_h, hash_l);
+}
+
+static int enetc_mac_filter_show(struct seq_file *s, void *data)
+{
+   struct maft_entry_data maft_data;
+   struct enetc_si *si = s->private;
+   struct enetc_hw *hw = &si->hw;
+   struct maft_keye_data *keye;
+   struct enetc_pf *pf;
+   int i, err, num_si;
+   u32 val;
+
+   pf = enetc_si_priv(si);
+   num_si = pf->caps.num_vsi + 1;
+
+   val = enetc_port_rd(hw, ENETC4_PSIPMMR);
+   for (i = 0; i < num_si; i++) {
+   seq_printf(s, "SI %d Unicast Promiscuous mode: %s\n",
+  i, is_en(PSIPMMR_SI_MAC_UP(i) & val));
+   seq_printf(s, "SI %d Multicast Promiscuous mode: %s\n",
+  i, is_en(PSIPMMR_SI_MAC_MP(i) & val));
+   }
+
+   /* MAC hash filter table */
+   for (i = 0; i < num_si; i++)
+   enetc_show_si_mac_hash_filter(s, i);
+
+   if (!pf->num_mfe)
+   return 0;
+
+   /* MAC address filter table */
+   seq_puts(s, "Show MAC address filter table\n");
+   for (i = 0; i < pf->num_mfe; i++) {
+   memset(&maft_data, 0, sizeof(maft_data));
+   err = ntmp_maft_query_entry(&si->ntmp.cbdrs, i, &maft_data);
+   if (err)
+   return err;
+
+   keye = &maft_data.keye;
+   seq_printf(s, "Entry %d, MAC: %pM, SI bitmap: 0x%04x\n", i,
+  keye->mac_addr, 
le16_to_cpu(maft_data.cfge.si_bitmap));
+   }
+
+   return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(enetc_mac_filter);
+
+void enetc_create_debugfs(struct enetc_si *si)
+{
+   struct net_device *ndev = si->ndev;
+   struct dentry *root;
+
+   root = debugfs_create_dir(netdev_name(ndev), NULL);
+   if (IS_ERR(root))
+   return;
+
+   si->debugfs_root = root;
+
+   debugfs_create_file("mac_filter", 0444, root, si, 
&enetc_mac_filter_fops);
+}
+
+vo

[PATCH v4 net-next 12/14] net: enetc: add VLAN filtering support for i.MX95 ENETC PF

2025-03-11 Thread Wei Fang

Add VLAN hash filter support for i.MX95 ENETC PF. If VLAN filtering is
disabled, then VLAN promiscuous mode will be enabled, which means that
PF qualifies for reception of all VLAN tags.

Signed-off-by: Wei Fang 
---
 .../net/ethernet/freescale/enetc/enetc4_hw.h  |  4 
 .../net/ethernet/freescale/enetc/enetc4_pf.c  | 20 +++
 .../freescale/enetc/enetc_pf_common.c |  2 +-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h 
b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
index 826359004850..aa25b445d301 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
@@ -107,6 +107,10 @@
 #define ENETC4_PSIMMHFR0(a)((a) * 0x80 + 0x2058)
 #define ENETC4_PSIMMHFR1(a)((a) * 0x80 + 0x205c)
 
+/* Port station interface a VLAN hash filter register 0/1 */
+#define ENETC4_PSIVHFR0(a) ((a) * 0x80 + 0x2060)
+#define ENETC4_PSIVHFR1(a) ((a) * 0x80 + 0x2064)
+
 #define ENETC4_PMCAPR  0x4004
 #define  PMCAPR_HD BIT(8)
 #define  PMCAPR_FP GENMASK(10, 9)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c 
b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index 53dbd5d71859..4aef00ff 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -101,6 +101,13 @@ static void enetc4_pf_set_si_mac_hash_filter(struct 
enetc_hw *hw, int si,
}
 }
 
+static void enetc4_pf_set_si_vlan_hash_filter(struct enetc_hw *hw,
+ int si, u64 hash)
+{
+   enetc_port_wr(hw, ENETC4_PSIVHFR0(si), lower_32_bits(hash));
+   enetc_port_wr(hw, ENETC4_PSIVHFR1(si), upper_32_bits(hash));
+}
+
 static void enetc4_pf_destroy_mac_list(struct enetc_pf *pf)
 {
struct enetc_mac_list_entry *entry;
@@ -407,6 +414,7 @@ static void enetc4_pf_set_mac_filter(struct enetc_pf *pf, 
int type)
 static const struct enetc_pf_ops enetc4_pf_ops = {
.set_si_primary_mac = enetc4_pf_set_si_primary_mac,
.get_si_primary_mac = enetc4_pf_get_si_primary_mac,
+   .set_si_vlan_hash_filter = enetc4_pf_set_si_vlan_hash_filter,
 };
 
 static int enetc4_pf_struct_init(struct enetc_si *si)
@@ -696,6 +704,16 @@ static void enetc4_pf_set_rx_mode(struct net_device *ndev)
 static int enetc4_pf_set_features(struct net_device *ndev,
  netdev_features_t features)
 {
+   netdev_features_t changed = ndev->features ^ features;
+   struct enetc_ndev_priv *priv = netdev_priv(ndev);
+   struct enetc_hw *hw = &priv->si->hw;
+
+   if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+   bool promisc_en = !(features & NETIF_F_HW_VLAN_CTAG_FILTER);
+
+   enetc4_pf_set_si_vlan_promisc(hw, 0, promisc_en);
+   }
+
enetc_set_features(ndev, features);
 
return 0;
@@ -709,6 +727,8 @@ static const struct net_device_ops enetc4_ndev_ops = {
.ndo_set_mac_address= enetc_pf_set_mac_addr,
.ndo_set_rx_mode= enetc4_pf_set_rx_mode,
.ndo_set_features   = enetc4_pf_set_features,
+   .ndo_vlan_rx_add_vid= enetc_vlan_rx_add_vid,
+   .ndo_vlan_rx_kill_vid   = enetc_vlan_rx_del_vid,
 };
 
 static struct phylink_pcs *
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c 
b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
index 9f812c1af7a3..3f7ccc482301 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
@@ -135,7 +135,7 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct 
net_device *ndev,
 
/* TODO: currently, i.MX95 ENETC driver does not support advanced 
features */
if (!is_enetc_rev1(si)) {
-   ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | 
NETIF_F_LOOPBACK);
+   ndev->hw_features &= ~NETIF_F_LOOPBACK;
goto end;
}
 
-- 
2.34.1

[PATCH v4 net-next 04/14] net: enetc: add MAC filter for i.MX95 ENETC PF

2025-03-11 Thread Wei Fang

The i.MX95 ENETC supports both MAC hash filter and MAC exact filter. MAC
hash filter is implenented through a 64-bits hash table to match against
the hashed addresses, PF and VFs each have two MAC hash tables, one is
for unicast and the other one is for multicast. But MAC exact filter is
shared between SIs (PF and VFs), each table entry contains a MAC address
that may be unicast or multicast and the entry also contains an SI bitmap
field that indicates for which SIs the entry is valid.

For i.MX95 ENETC, MAC exact filter only has 4 entries. According to the
observation of the system default network configuration, the MAC filter
will be configured with multiple multicast addresses, so MAC exact filter
does not have enough entries to implement multicast filtering. Therefore,
the current MAC exact filter is only used for unicast filtering. If the
number of unicast addresses exceeds 4, then MAC hash filter is used.

Note that both MAC hash filter and MAC exact filter can only be accessed
by PF, VFs can notify PF to set its corresponding MAC filter through the
mailbox mechanism of ENETC. But currently MAC filter is only added for
i.MX95 ENETC PF. The MAC filter support of ENETC VFs will be supported in
subsequent patches.

Signed-off-by: Wei Fang 
---
 drivers/net/ethernet/freescale/enetc/enetc.h  |   2 +
 .../net/ethernet/freescale/enetc/enetc4_hw.h  |   8 +
 .../net/ethernet/freescale/enetc/enetc4_pf.c  | 422 +-
 .../net/ethernet/freescale/enetc/enetc_hw.h   |   6 +
 .../net/ethernet/freescale/enetc/enetc_pf.h   |  11 +
 5 files changed, 448 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h 
b/drivers/net/ethernet/freescale/enetc/enetc.h
index 9380d3e8ca01..4dba91408e3d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -316,6 +316,8 @@ struct enetc_si {
const struct enetc_si_ops *ops;
 
struct enetc_mac_filter mac_filter[MADDR_TYPE];
+   struct workqueue_struct *workqueue;
+   struct work_struct rx_mode_task;
 };
 
 #define ENETC_SI_ALIGN 32
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h 
b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
index 695cb07c74bc..826359004850 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h
@@ -99,6 +99,14 @@
 #define ENETC4_PSICFGR2(a) ((a) * 0x80 + 0x2018)
 #define  PSICFGR2_NUM_MSIX GENMASK(5, 0)
 
+/* Port station interface a unicast MAC hash filter register 0/1 */
+#define ENETC4_PSIUMHFR0(a)((a) * 0x80 + 0x2050)
+#define ENETC4_PSIUMHFR1(a)((a) * 0x80 + 0x2054)
+
+/* Port station interface a multicast MAC hash filter register 0/1 */
+#define ENETC4_PSIMMHFR0(a)((a) * 0x80 + 0x2058)
+#define ENETC4_PSIMMHFR1(a)((a) * 0x80 + 0x205c)
+
 #define ENETC4_PMCAPR  0x4004
 #define  PMCAPR_HD BIT(8)
 #define  PMCAPR_FP GENMASK(10, 9)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c 
b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index 63001379f0a0..7d1c545f3f56 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -11,6 +11,15 @@
 
 #define ENETC_SI_MAX_RING_NUM  8
 
+#define ENETC_MAC_FILTER_TYPE_UC   BIT(0)
+#define ENETC_MAC_FILTER_TYPE_MC   BIT(1)
+#define ENETC_MAC_FILTER_TYPE_ALL  (ENETC_MAC_FILTER_TYPE_UC | \
+ENETC_MAC_FILTER_TYPE_MC)
+
+struct enetc_mac_addr {
+   u8 addr[ETH_ALEN];
+};
+
 static void enetc4_get_port_caps(struct enetc_pf *pf)
 {
struct enetc_hw *hw = &pf->si->hw;
@@ -26,6 +35,9 @@ static void enetc4_get_port_caps(struct enetc_pf *pf)
 
val = enetc_port_rd(hw, ENETC4_PMCAPR);
pf->caps.half_duplex = (val & PMCAPR_HD) ? 1 : 0;
+
+   val = enetc_port_rd(hw, ENETC4_PSIMAFCAPR);
+   pf->caps.mac_filter_num = val & PSIMAFCAPR_NUM_MAC_AFTE;
 }
 
 static void enetc4_pf_set_si_primary_mac(struct enetc_hw *hw, int si,
@@ -56,6 +68,341 @@ static void enetc4_pf_get_si_primary_mac(struct enetc_hw 
*hw, int si,
put_unaligned_le16(lower, addr + 4);
 }
 
+static void enetc4_pf_set_si_mac_promisc(struct enetc_hw *hw, int si,
+int type, bool en)
+{
+   u32 val = enetc_port_rd(hw, ENETC4_PSIPMMR);
+
+   if (type == UC) {
+   if (en)
+   val |= PSIPMMR_SI_MAC_UP(si);
+   else
+   val &= ~PSIPMMR_SI_MAC_UP(si);
+   } else { /* Multicast promiscuous mode. */
+   if (en)
+   val |= PSIPMMR_SI_MAC_MP(si);
+   else
+   val &= ~PSIPMMR_SI_MAC_MP(si);
+   }
+
+   enetc_port_wr(hw, ENETC4_PSIPMMR, val);
+}
+
+static void enetc4_pf_set_si_mac_hash_filter(struct enetc_hw *hw, int si,
+

Re: [PATCH v7 1/7] powerpc/pseries: Define common functions for RTAS sequence calls

2025-03-11 Thread kernel test robot

Hi Haren,

kernel test robot noticed the following build warnings:

[auto build test WARNING on powerpc/next]
[also build test WARNING on powerpc/fixes linus/master v6.14-rc6]
[cannot apply to next-20250307]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Haren-Myneni/powerpc-pseries-Define-common-functions-for-RTAS-sequence-calls/20250310-054319
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
patch link:
https://lore.kernel.org/r/20250309213916.762116-2-haren%40linux.ibm.com
patch subject: [PATCH v7 1/7] powerpc/pseries: Define common functions for RTAS 
sequence calls
config: powerpc64-randconfig-r072-20250311 
(https://download.01.org/0day-ci/archive/20250311/202503111557.y6cdjlzi-...@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 
e15545cad8297ec7555f26e5ae74a9f0511203e7)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20250311/202503111557.y6cdjlzi-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202503111557.y6cdjlzi-...@intel.com/

All warnings (new ones prefixed by >>):

>> arch/powerpc/platforms/pseries/papr-vpd.c:127: warning: Function parameter 
>> or struct member 'seq' not described in 'vpd_sequence_begin'
--
>> arch/powerpc/platforms/pseries/papr-rtas-common.c:171: warning: Excess 
>> function parameter 'param' description in 'papr_rtas_retrieve'
>> arch/powerpc/platforms/pseries/papr-rtas-common.c:208: warning: Excess 
>> function parameter 'param' description in 'papr_rtas_setup_file_interface'
>> arch/powerpc/platforms/pseries/papr-rtas-common.c:255: warning: Function 
>> parameter or struct member 'status' not described in 
>> 'papr_rtas_sequence_should_stop'
>> arch/powerpc/platforms/pseries/papr-rtas-common.c:255: warning: Function 
>> parameter or struct member 'init_state' not described in 
>> 'papr_rtas_sequence_should_stop'


vim +127 arch/powerpc/platforms/pseries/papr-vpd.c

514f6ff4369a30b Nathan Lynch 2023-12-12  113  
514f6ff4369a30b Nathan Lynch 2023-12-12  114  /*
514f6ff4369a30b Nathan Lynch 2023-12-12  115   * Internal VPD sequence APIs. A 
VPD sequence is a series of calls to
514f6ff4369a30b Nathan Lynch 2023-12-12  116   * ibm,get-vpd for a given 
location code. The sequence ends when an
514f6ff4369a30b Nathan Lynch 2023-12-12  117   * error is encountered or all 
VPD for the location code has been
514f6ff4369a30b Nathan Lynch 2023-12-12  118   * returned.
514f6ff4369a30b Nathan Lynch 2023-12-12  119   */
514f6ff4369a30b Nathan Lynch 2023-12-12  120  
514f6ff4369a30b Nathan Lynch 2023-12-12  121  /**
514f6ff4369a30b Nathan Lynch 2023-12-12  122   * vpd_sequence_begin() - Begin a 
VPD retrieval sequence.
514f6ff4369a30b Nathan Lynch 2023-12-12  123   *
514f6ff4369a30b Nathan Lynch 2023-12-12  124   * Context: May sleep.
514f6ff4369a30b Nathan Lynch 2023-12-12  125   */
c657e2672fb8b67 Haren Myneni 2025-03-09  126  static void 
vpd_sequence_begin(struct papr_rtas_sequence *seq)
514f6ff4369a30b Nathan Lynch 2023-12-12 @127  {
c657e2672fb8b67 Haren Myneni 2025-03-09  128struct rtas_ibm_get_vpd_params 
*vpd_params;
514f6ff4369a30b Nathan Lynch 2023-12-12  129/*
514f6ff4369a30b Nathan Lynch 2023-12-12  130 * Use a static data structure 
for the location code passed to
514f6ff4369a30b Nathan Lynch 2023-12-12  131 * RTAS to ensure it's in the 
RMA and avoid a separate work
514f6ff4369a30b Nathan Lynch 2023-12-12  132 * area allocation. Guarded by 
the function lock.
514f6ff4369a30b Nathan Lynch 2023-12-12  133 */
514f6ff4369a30b Nathan Lynch 2023-12-12  134static struct 
papr_location_code static_loc_code;
514f6ff4369a30b Nathan Lynch 2023-12-12  135  
c657e2672fb8b67 Haren Myneni 2025-03-09  136vpd_params =  (struct 
rtas_ibm_get_vpd_params *)seq->params;
514f6ff4369a30b Nathan Lynch 2023-12-12  137/*
514f6ff4369a30b Nathan Lynch 2023-12-12  138 * We could allocate the work 
area before acquiring the
514f6ff4369a30b Nathan Lynch 2023-12-12  139 * function lock, but that 
would allow concurrent requests to
514f6ff4369a30b Nathan Lynch 2023-12-12  140 * exhaust the limited work 
area pool for no benefit. So
514f6ff4369a30b Nathan Lynch 2023-12-12  141 * allocate the work area under 
the lock.
514f6ff4369a30b Nathan Lynch 2023-12-12  142 */
514f6ff4369a30b Nathan Lynch 2023-12-12  143
mutex_lock(&rtas_ibm_get

[PATCH v12 1/4] cpu/SMT: Provide a default topology_is_primary_thread()

2025-03-11 Thread Yicong Yang

From: Yicong Yang 

Currently if architectures want to support HOTPLUG_SMT they need to
provide a topology_is_primary_thread() telling the framework which
thread in the SMT cannot offline. However arm64 doesn't have a
restriction on which thread in the SMT cannot offline, a simplest
choice is that just make 1st thread as the "primary" thread. So
just make this as the default implementation in the framework and
let architectures like x86 that have special primary thread to
override this function (which they've already done).

There's no need to provide a stub function if !CONFIG_SMP or
!CONFIG_HOTPLUG_SMT. In such case the testing CPU is already
the 1st CPU in the SMT so it's always the primary thread.

Reviewed-by: Jonathan Cameron 
Reviewed-by: Pierre Gondois 
Reviewed-by: Dietmar Eggemann 
Signed-off-by: Yicong Yang 
---
Pre questioned in v9 [1] whether this works on architectures not using
CONFIG_GENERIC_ARCH_TOPOLOGY, See [2] for demonstration hacking on LoongArch
VM and this also works. Architectures should use this on their own situation.
[1] 
https://lore.kernel.org/linux-arm-kernel/427bd639-33c3-47e4-9e83-68c428eb1...@arm.com/
[2] 
https://lore.kernel.org/linux-arm-kernel/a5690fee-3019-f26c-8bad-1d95e388e...@huawei.com/

 arch/powerpc/include/asm/topology.h |  1 +
 arch/x86/include/asm/topology.h |  2 +-
 include/linux/topology.h| 24 
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/topology.h 
b/arch/powerpc/include/asm/topology.h
index 16bacfe8c7a2..da15b5efe807 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -152,6 +152,7 @@ static inline bool topology_is_primary_thread(unsigned int 
cpu)
 {
return cpu == cpu_first_thread_sibling(cpu);
 }
+#define topology_is_primary_thread topology_is_primary_thread
 
 static inline bool topology_smt_thread_allowed(unsigned int cpu)
 {
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ec134b719144..6c79ee7c0957 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -229,11 +229,11 @@ static inline bool topology_is_primary_thread(unsigned 
int cpu)
 {
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
 }
+#define topology_is_primary_thread topology_is_primary_thread
 
 #else /* CONFIG_SMP */
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
-static inline bool topology_is_primary_thread(unsigned int cpu) { return true; 
}
 static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
 #endif /* !CONFIG_SMP */
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 52f5850730b3..6ae995e18ff5 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -240,6 +240,30 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
 }
 #endif
 
+#ifndef topology_is_primary_thread
+
+#define topology_is_primary_thread topology_is_primary_thread
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+   /*
+* When disabling SMT the primary thread of the SMT will remain
+* enabled/active. Architectures do have a special primary thread
+* (e.g. x86) needs to override this function. Otherwise can make
+* the first thread in the SMT as the primary thread.
+*
+* The sibling cpumask of an offline CPU contains always the CPU
+* itself for architectures using the implementation of
+* CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology.
+* Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for
+* building their topology have to check whether to use this default
+* implementation or to override it.
+*/
+   return cpu == cpumask_first(topology_sibling_cpumask(cpu));
+}
+
+#endif
+
 static inline const struct cpumask *cpu_cpu_mask(int cpu)
 {
return cpumask_of_node(cpu_to_node(cpu));
-- 
2.24.0

Re: [linux-next-20250307] Build Failure

2025-03-11 Thread Madhavan Srinivasan




On 3/9/25 6:08 PM, Venkat Rao Bagalkote wrote:
> Greetings!!,
> 
> I see linux-next-20250307 fails to build on IBM Power9 and Power10 servers.
> 
> 
> Errors:
> 
> In file included from ^[[01m^[[K^[[m^[[K:
> ^[[01m^[[K./usr/include/cxl/features.h:11:10:^[[m^[[K ^[[01;31m^[[Kfatal 
> error: ^[[m^[[Kuuid/uuid.h: No such file or directory
>    11 | #include ^[[01;31m^[[K^[[m^[[K
>   |  ^[[01;31m^[[K^^[[m^[[K
> compilation terminated.
> make[4]: *** [usr/include/Makefile:85: usr/include/cxl/features.hdrtest] 
> Error 1
> make[3]: *** [scripts/Makefile.build:461: usr/include] Error 2
> make[2]: *** [scripts/Makefile.build:461: usr] Error 2
> make[2]: *** Waiting for unfinished jobs
> arch/powerpc/kernel/switch.o: warning: objtool: .text+0x4: 
> intra_function_call not a direct call
> arch/powerpc/crypto/ghashp8-ppc.o: warning: objtool: .text+0x22c: unannotated 
> intra-function call
> arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xe84: 
> intra_function_call not a direct call
> make[1]: *** [/home/linux_src/linux/Makefile:1997: .] Error 2
> make: *** [Makefile:251: __sub-make] Error 2
> 
> Please add below tag, if you happen to fix this issue.
> 

Fixes has been posted to handle these errors 

https://patchwork.ozlabs.org/project/linuxppc-dev/patch/88876fb4e412203452e57d1037a1341cf15ccc7b.1741128981.git.christophe.le...@csgroup.eu/
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/7aa7eb73fe6bc95ac210510e22394ca0ae227b69.1741128786.git.christophe.le...@csgroup.eu/

and these are already part of powerpc/next-test, will be moved to powerpc/next 
soon

Maddy

> Reported-by: Venkat Rao Bagalkote 
> 
> 
> Regards,
> 
> Venkat.
> 
>

Re: [PATCH v7 3/7] powerpc/pseries: Add papr-indices char driver for ibm,get-indices

2025-03-11 Thread kernel test robot

Hi Haren,

kernel test robot noticed the following build warnings:

[auto build test WARNING on powerpc/next]
[also build test WARNING on powerpc/fixes linus/master v6.14-rc6]
[cannot apply to next-20250307]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Haren-Myneni/powerpc-pseries-Define-common-functions-for-RTAS-sequence-calls/20250310-054319
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
patch link:
https://lore.kernel.org/r/20250309213916.762116-4-haren%40linux.ibm.com
patch subject: [PATCH v7 3/7] powerpc/pseries: Add papr-indices char driver for 
ibm,get-indices
config: powerpc64-randconfig-r072-20250311 
(https://download.01.org/0day-ci/archive/20250311/202503111710.tspk3snj-...@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 
e15545cad8297ec7555f26e5ae74a9f0511203e7)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20250311/202503111710.tspk3snj-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202503111710.tspk3snj-...@intel.com/

All warnings (new ones prefixed by >>):

>> arch/powerpc/platforms/pseries/papr-indices.c:124: warning: Function 
>> parameter or struct member 'seq' not described in 'indices_sequence_begin'


vim +124 arch/powerpc/platforms/pseries/papr-indices.c

   110  
   111  /*
   112   * Internal indices sequence APIs. A sequence is a series of calls to
   113   * ibm,get-indices for a given location code. The sequence ends when
   114   * an error is encountered or all indices for the input has been
   115   * returned.
   116   */
   117  
   118  /**
   119   * indices_sequence_begin() - Begin a indices retrieval sequence.
   120   *
   121   * Context: May sleep.
   122   */
   123  static void indices_sequence_begin(struct papr_rtas_sequence *seq)
 > 124  {
   125  struct rtas_get_indices_params  *param;
   126  
   127  param = (struct rtas_get_indices_params *)seq->params;
   128  /*
   129   * We could allocate the work area before acquiring the
   130   * function lock, but that would allow concurrent requests to
   131   * exhaust the limited work area pool for no benefit. So
   132   * allocate the work area under the lock.
   133   */
   134  mutex_lock(&rtas_ibm_get_indices_lock);
   135  param->work_area = 
rtas_work_area_alloc(RTAS_GET_INDICES_BUF_SIZE);
   136  param->next = 1;
   137  param->status = 0;
   138  }
   139  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [linux-next-20250307] Build Failure

2025-03-11 Thread Madhavan Srinivasan




On 3/11/25 2:17 PM, Madhavan Srinivasan wrote:
> 
> 
> On 3/9/25 6:08 PM, Venkat Rao Bagalkote wrote:
>> Greetings!!,
>>
>> I see linux-next-20250307 fails to build on IBM Power9 and Power10 servers.
>>
>>
>> Errors:
>>
>> In file included from ^[[01m^[[K^[[m^[[K:
>> ^[[01m^[[K./usr/include/cxl/features.h:11:10:^[[m^[[K ^[[01;31m^[[Kfatal 
>> error: ^[[m^[[Kuuid/uuid.h: No such file or directory
>>    11 | #include ^[[01;31m^[[K^[[m^[[K
>>   |  ^[[01;31m^[[K^^[[m^[[K
>> compilation terminated.

This is new report, needs to debug 
Fixes i was pointing out is for the objtool errors.

Maddy


>> make[4]: *** [usr/include/Makefile:85: usr/include/cxl/features.hdrtest] 
>> Error 1
>> make[3]: *** [scripts/Makefile.build:461: usr/include] Error 2
>> make[2]: *** [scripts/Makefile.build:461: usr] Error 2
>> make[2]: *** Waiting for unfinished jobs
>> arch/powerpc/kernel/switch.o: warning: objtool: .text+0x4: 
>> intra_function_call not a direct call
>> arch/powerpc/crypto/ghashp8-ppc.o: warning: objtool: .text+0x22c: 
>> unannotated intra-function call
>> arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xe84: 
>> intra_function_call not a direct call
>> make[1]: *** [/home/linux_src/linux/Makefile:1997: .] Error 2
>> make: *** [Makefile:251: __sub-make] Error 2
>>
>> Please add below tag, if you happen to fix this issue.
>>
> 
> Fixes has been posted to handle these errors 
> 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/88876fb4e412203452e57d1037a1341cf15ccc7b.1741128981.git.christophe.le...@csgroup.eu/
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/7aa7eb73fe6bc95ac210510e22394ca0ae227b69.1741128786.git.christophe.le...@csgroup.eu/
> 
> and these are already part of powerpc/next-test, will be moved to 
> powerpc/next soon
> 
> Maddy
> 
>> Reported-by: Venkat Rao Bagalkote 
>>
>>
>> Regards,
>>
>> Venkat.
>>
>>
> 
>

[PATCH v4 net-next 13/14] net: enetc: add loopback support for i.MX95 ENETC PF

2025-03-11 Thread Wei Fang

Add internal loopback support for i.MX95 ENETC PF, the default loopback
mode is MAC level loopback, the MAC Tx data is looped back onto the Rx.
The MAC interface runs at a fixed 1:8 ratio of NETC clock in MAC-level
loopback mode, with no dependency on Tx clock.

Signed-off-by: Wei Fang 
---
 .../net/ethernet/freescale/enetc/enetc4_pf.c   | 18 ++
 .../ethernet/freescale/enetc/enetc_pf_common.c |  4 +---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c 
b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index 4aef00ff..c169900ccb4a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -108,6 +108,21 @@ static void enetc4_pf_set_si_vlan_hash_filter(struct 
enetc_hw *hw,
enetc_port_wr(hw, ENETC4_PSIVHFR1(si), upper_32_bits(hash));
 }
 
+static void enetc4_pf_set_loopback(struct net_device *ndev, bool en)
+{
+   struct enetc_ndev_priv *priv = netdev_priv(ndev);
+   struct enetc_si *si = priv->si;
+   u32 val;
+
+   val = enetc_port_mac_rd(si, ENETC4_PM_CMD_CFG(0));
+   val = u32_replace_bits(val, en ? 1 : 0, PM_CMD_CFG_LOOP_EN);
+   /* Default to select MAC level loopback mode if loopback is enabled. */
+   val = u32_replace_bits(val, en ? LPBCK_MODE_MAC_LEVEL : 0,
+  PM_CMD_CFG_LPBK_MODE);
+
+   enetc_port_mac_wr(si, ENETC4_PM_CMD_CFG(0), val);
+}
+
 static void enetc4_pf_destroy_mac_list(struct enetc_pf *pf)
 {
struct enetc_mac_list_entry *entry;
@@ -714,6 +729,9 @@ static int enetc4_pf_set_features(struct net_device *ndev,
enetc4_pf_set_si_vlan_promisc(hw, 0, promisc_en);
}
 
+   if (changed & NETIF_F_LOOPBACK)
+   enetc4_pf_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK));
+
enetc_set_features(ndev, features);
 
return 0;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c 
b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
index 3f7ccc482301..0a2b8769a175 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
@@ -134,10 +134,8 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct 
net_device *ndev,
}
 
/* TODO: currently, i.MX95 ENETC driver does not support advanced 
features */
-   if (!is_enetc_rev1(si)) {
-   ndev->hw_features &= ~NETIF_F_LOOPBACK;
+   if (!is_enetc_rev1(si))
goto end;
-   }
 
ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
 NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
-- 
2.34.1

[PATCH v4 net-next 08/14] net: enetc: add RSS support for i.MX95 ENETC PF

2025-03-11 Thread Wei Fang

Compared with LS1028A, there are two main differences: first, i.MX95
ENETC uses NTMP 2.0 to manage the RSS table, and second, the offset
of the RSS Key registers is different. Some modifications have been
made in the previous patches based on these differences to ensure that
the relevant interfaces are compatible with i.MX95. So it's time to
add RSS support to i.MX95 ENETC PF.

Signed-off-by: Wei Fang 
---
 drivers/net/ethernet/freescale/enetc/enetc.c  |  5 +--
 drivers/net/ethernet/freescale/enetc/enetc.h  |  2 +
 .../net/ethernet/freescale/enetc/enetc4_pf.c  | 11 +
 .../net/ethernet/freescale/enetc/enetc_cbdr.c | 14 +++
 .../ethernet/freescale/enetc/enetc_ethtool.c  | 42 ---
 .../freescale/enetc/enetc_pf_common.c |  6 +--
 6 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c 
b/drivers/net/ethernet/freescale/enetc/enetc.c
index 2a8fa455e96b..5b5e65ac8fab 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2436,10 +2436,7 @@ int enetc_configure_si(struct enetc_ndev_priv *priv)
if (si->hw_features & ENETC_SI_F_LSO)
enetc_set_lso_flags_mask(hw);
 
-   /* TODO: RSS support for i.MX95 will be supported later, and the
-* is_enetc_rev1() condition will be removed
-*/
-   if (si->num_rss && is_enetc_rev1(si)) {
+   if (si->num_rss) {
err = enetc_setup_default_rss_table(si, priv->num_rx_rings);
if (err)
return err;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h 
b/drivers/net/ethernet/freescale/enetc/enetc.h
index a3ce324c716c..ecf79338cd79 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -543,6 +543,8 @@ void enetc_set_rss_key(struct enetc_si *si, const u8 
*bytes);
 int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count);
 int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count);
 int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
+int enetc4_get_rss_table(struct enetc_si *si, u32 *table, int count);
+int enetc4_set_rss_table(struct enetc_si *si, const u32 *table, int count);
 
 static inline void *enetc_cbd_alloc_data_mem(struct enetc_si *si,
 struct enetc_cbd *cbd,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c 
b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
index f991e1aae85c..53dbd5d71859 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c
@@ -693,6 +693,14 @@ static void enetc4_pf_set_rx_mode(struct net_device *ndev)
queue_work(si->workqueue, &si->rx_mode_task);
 }
 
+static int enetc4_pf_set_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+   enetc_set_features(ndev, features);
+
+   return 0;
+}
+
 static const struct net_device_ops enetc4_ndev_ops = {
.ndo_open   = enetc_open,
.ndo_stop   = enetc_close,
@@ -700,6 +708,7 @@ static const struct net_device_ops enetc4_ndev_ops = {
.ndo_get_stats  = enetc_get_stats,
.ndo_set_mac_address= enetc_pf_set_mac_addr,
.ndo_set_rx_mode= enetc4_pf_set_rx_mode,
+   .ndo_set_features   = enetc4_pf_set_features,
 };
 
 static struct phylink_pcs *
@@ -1108,6 +1117,8 @@ static void enetc4_pf_netdev_destroy(struct enetc_si *si)
 static const struct enetc_si_ops enetc4_psi_ops = {
.setup_cbdr = enetc4_setup_cbdr,
.teardown_cbdr = enetc4_teardown_cbdr,
+   .get_rss_table = enetc4_get_rss_table,
+   .set_rss_table = enetc4_set_rss_table,
 };
 
 static int enetc4_pf_wq_task_init(struct enetc_si *si)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c 
b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 4e5125331d7b..1a74b93f1fd3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -299,3 +299,17 @@ int enetc_set_rss_table(struct enetc_si *si, const u32 
*table, int count)
return enetc_cmd_rss_table(si, (u32 *)table, count, false);
 }
 EXPORT_SYMBOL_GPL(enetc_set_rss_table);
+
+int enetc4_get_rss_table(struct enetc_si *si, u32 *table, int count)
+{
+   return ntmp_rsst_query_or_update_entry(&si->ntmp.cbdrs,
+  table, count, true);
+}
+EXPORT_SYMBOL_GPL(enetc4_get_rss_table);
+
+int enetc4_set_rss_table(struct enetc_si *si, const u32 *table, int count)
+{
+   return ntmp_rsst_query_or_update_entry(&si->ntmp.cbdrs,
+  (u32 *)table, count, false);
+}
+EXPORT_SYMBOL_GPL(enetc4_set_rss_table);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c 
b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 1a8fae

[PATCH 5.4 059/328] module: Extend the preempt disabled section in dereference_symbol_descriptor().

2025-03-11 Thread Greg Kroah-Hartman

5.4-stable review patch.  If anyone has any objections, please let me know.

--

From: Sebastian Andrzej Siewior 

[ Upstream commit a145c848d69f9c6f32008d8319edaa133360dd74 ]

dereference_symbol_descriptor() needs to obtain the module pointer
belonging to pointer in order to resolve that pointer.
The returned mod pointer is obtained under RCU-sched/ preempt_disable()
guarantees and needs to be used within this section to ensure that the
module is not removed in the meantime.

Extend the preempt_disable() section to also cover
dereference_module_function_descriptor().

Fixes: 04b8eb7a4ccd9 ("symbol lookup: introduce 
dereference_symbol_descriptor()")
Cc: James E.J. Bottomley 
Cc: Christophe Leroy 
Cc: Helge Deller 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Naveen N Rao 
Cc: Nicholas Piggin 
Cc: Sergey Senozhatsky 
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Sergey Senozhatsky 
Acked-by: Peter Zijlstra (Intel) 
Signed-off-by: Sebastian Andrzej Siewior 
Link: https://lore.kernel.org/r/20250108090457.512198-2-bige...@linutronix.de
Signed-off-by: Petr Pavlu 
Signed-off-by: Sasha Levin 
---
 include/linux/kallsyms.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 1f96ce2b47df1..d84b677c728a9 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -63,10 +63,10 @@ static inline void *dereference_symbol_descriptor(void *ptr)
 
preempt_disable();
mod = __module_address((unsigned long)ptr);
-   preempt_enable();
 
if (mod)
ptr = dereference_module_function_descriptor(mod, ptr);
+   preempt_enable();
 #endif
return ptr;
 }
-- 
2.39.5

Re: [PATCH] selftest/powerpc/mm/pkey: fix build-break introduced by commit 00894c3fc917

2025-03-11 Thread Catalin Marinas

Hi Madhavan,

On Tue, Mar 11, 2025 at 02:11:29PM +0530, Madhavan Srinivasan wrote:
> Build break was reported in the powerpc mailing list for next-20250218 with 
> below errors
> 
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/mm; 
> mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C mm all
>   CC   pkey_exec_prot
> In file included from pkey_exec_prot.c:18:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In 
> function ‘pkeys_unsupported’:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function)
>96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
>   |  ^
> 
> https://lore.kernel.org/all/20250113170619.484698-2-yury.khrusta...@arm.com/ 
> patchset
> has been queued to arm64/for-next/pkey_unrestricted which is causing a build 
> break
> in the selftest/powerpc builds.
> 
> Commit 6d61527d931ba ("mm/pkey: Add PKEY_UNRESTRICTED macro") added a macro
> PKEY_UNRESTRICTED to handle implicit literal value of 0x0 (which is 
> "unrestricted").
> Add the same to selftest/powerpc/pkeys.h to fix the reported build break.
> 
> Reported-by: Venkat Rao Bagalkote 
> Closes: 
> https://lore.kernel.org/lkml/3267ea6e-5a1a-4752-96ef-8351c912d...@linux.ibm.com/T/
> Tested-by: Venkat Rao Bagalkote 
> Signed-off-by: Madhavan Srinivasan 
> ---
> Catalin, can you take this fix via arm64/for-next/pkey_unrestricted?
> Patch applies clean on top of arm64/for-next/pkey_unrestricted

I'll take it, sorry for the break. I did not realise powerpc duplicates
those definitions.

-- 
Catalin

[PATCH v12 3/4] arm64: topology: Support SMT control on ACPI based system

2025-03-11 Thread Yicong Yang

From: Yicong Yang 

For ACPI we'll build the topology from PPTT and we cannot directly
get the SMT number of each core. Instead using a temporary xarray
to record the heterogeneous information (from ACPI_PPTT_ACPI_IDENTICAL)
and SMT information of the first core in its heterogeneous CPU cluster
when building the topology. Then we can know the largest SMT number
in the system. If a homogeneous system's using ACPI 6.2 or later,
all the CPUs should be under the root node of PPTT. There'll be
only one entry in the xarray and all the CPUs in the system will
be assumed identical.

The framework's SMT control provides two interface to the users [1]
through /sys/devices/system/cpu/smt/control:
1) enable SMT by writing "on" and disable by "off"
2) enable SMT by writing max_thread_number or disable by writing 1

Both method support to completely disable/enable the SMT cores so both
work correctly for symmetric SMT platform and asymmetric platform with
non-SMT and one type SMT cores like:
core A: 1 thread
core B: X (X!=1) threads

Note that for a theoretically possible multiple SMT-X (X>1) core
platform the SMT control is also supported as expected but only
by writing the "on/off" method.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542
Reviewed-by: Jonathan Cameron 
Reviewed-by: Hanjun Guo 
Reviewed-by: Pierre Gondois 
Reviewed-by: Dietmar Eggemann 
Signed-off-by: Yicong Yang 
---
 arch/arm64/kernel/topology.c | 54 
 1 file changed, 54 insertions(+)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index cb180684d10d..0bcea4f89ea8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -15,8 +15,10 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -37,17 +39,28 @@ static bool __init acpi_cpu_is_threaded(int cpu)
return !!is_threaded;
 }
 
+struct cpu_smt_info {
+   unsigned int thread_num;
+   int core_id;
+};
+
 /*
  * Propagate the topology information of the processor_topology_node tree to 
the
  * cpu_topology array.
  */
 int __init parse_acpi_topology(void)
 {
+   unsigned int max_smt_thread_num = 1;
+   struct cpu_smt_info *entry;
+   struct xarray hetero_cpu;
+   unsigned long hetero_id;
int cpu, topology_id;
 
if (acpi_disabled)
return 0;
 
+   xa_init(&hetero_cpu);
+
for_each_possible_cpu(cpu) {
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
@@ -57,6 +70,34 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].thread_id = topology_id;
topology_id = find_acpi_cpu_topology(cpu, 1);
cpu_topology[cpu].core_id   = topology_id;
+
+   /*
+* In the PPTT, CPUs below a node with the 'identical
+* implementation' flag have the same number of threads.
+* Count the number of threads for only one CPU (i.e.
+* one core_id) among those with the same hetero_id.
+* See the comment of find_acpi_cpu_topology_hetero_id()
+* for more details.
+*
+* One entry is created for each node having:
+* - the 'identical implementation' flag
+* - its parent not having the flag
+*/
+   hetero_id = find_acpi_cpu_topology_hetero_id(cpu);
+   entry = xa_load(&hetero_cpu, hetero_id);
+   if (!entry) {
+   entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+   WARN_ON_ONCE(!entry);
+
+   if (entry) {
+   entry->core_id = topology_id;
+   entry->thread_num = 1;
+   xa_store(&hetero_cpu, hetero_id,
+entry, GFP_KERNEL);
+   }
+   } else if (entry->core_id == topology_id) {
+   entry->thread_num++;
+   }
} else {
cpu_topology[cpu].thread_id  = -1;
cpu_topology[cpu].core_id= topology_id;
@@ -67,6 +108,19 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].package_id = topology_id;
}
 
+   /*
+* This is a short loop since the number of XArray elements is the
+* number of heterogeneous CPU clusters. On a homogeneous system
+* there's only one entry in the XArray.
+*/
+   xa_for_each(&hetero_cpu, hetero_id, entry) {
+   ma

[PATCH v12 2/4] arch_topology: Support SMT control for OF based system

2025-03-11 Thread Yicong Yang

From: Yicong Yang 

On building the topology from the devicetree, we've already gotten the
SMT thread number of each core. Update the largest SMT thread number
and enable the SMT control by the end of topology parsing.

The framework's SMT control provides two interface to the users [1]
through /sys/devices/system/cpu/smt/control:
1) enable SMT by writing "on" and disable by "off"
2) enable SMT by writing max_thread_number or disable by writing 1

Both method support to completely disable/enable the SMT cores so both
work correctly for symmetric SMT platform and asymmetric platform with
non-SMT and one type SMT cores like:
core A: 1 thread
core B: X (X!=1) threads

Note that for a theoretically possible multiple SMT-X (X>1) core
platform the SMT control is also supported as expected but only
by writing the "on/off" method.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542
Reviewed-by: Pierre Gondois 
Reviewed-by: Dietmar Eggemann 
Signed-off-by: Yicong Yang 
---
 drivers/base/arch_topology.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 3ebe77566788..d409d323ee64 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -506,6 +507,10 @@ core_initcall(free_raw_capacity);
 #endif
 
 #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+
+/* Used to enable the SMT control */
+static unsigned int max_smt_thread_num = 1;
+
 /*
  * This function returns the logic cpu number of the node.
  * There are basically three kinds of return values:
@@ -565,6 +570,8 @@ static int __init parse_core(struct device_node *core, int 
package_id,
i++;
} while (1);
 
+   max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i);
+
cpu = get_cpu_for_node(core);
if (cpu >= 0) {
if (!leaf) {
@@ -677,6 +684,17 @@ static int __init parse_socket(struct device_node *socket)
if (!has_socket)
ret = parse_cluster(socket, 0, -1, 0);
 
+   /*
+* Reset the max_smt_thread_num to 1 on failure. Since on failure
+* we need to notify the framework the SMT is not supported, but
+* max_smt_thread_num can be initialized to the SMT thread number
+* of the cores which are successfully parsed.
+*/
+   if (ret)
+   max_smt_thread_num = 1;
+
+   cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+
return ret;
 }
 
-- 
2.24.0

[PATCH v12 0/4] Support SMT control on arm64

2025-03-11 Thread Yicong Yang

From: Yicong Yang 

The core CPU control framework supports runtime SMT control which
is not yet supported on arm64. Besides the general vulnerabilities
concerns we want this runtime control on our arm64 server for:

- better single CPU performance in some cases
- saving overall power consumption

This patchset implements it in the following aspects:

- Provides a default topology_is_primary_thread()
- support retrieve SMT thread number on OF based system
- support retrieve SMT thread number on ACPI based system
- select HOTPLUG_SMT for arm64

Tests has been done on our ACPI based arm64 server and on ACPI/OF
based QEMU VMs.

Change since v11:
- Remove the check and warning for heterogeneous platform as suggested and 
discussed
- refine comments/commit according to Dietmar
Link: 
https://lore.kernel.org/linux-arm-kernel/20250218141018.18082-1-yangyic...@huawei.com/

Change since v10:
- handle topology parsing failure case on DT based system
- address some style comments per Jonathan and add tags, Thanks
Link: 
https://lore.kernel.org/linux-arm-kernel/20241220075313.51502-1-yangyic...@huawei.com/

Change since v9:
- Refine the comment of topology_is_primary_thread(). Tested with LoongArch
  to prove it also works on architecture's not using 
CONFIG_GENERIC_ARCH_TOPOLOGY
- always call cpu_smt_set_num_threads() to make the smt/control shows correct
  status on non-SMT system
Link: 
https://lore.kernel.org/linux-arm-kernel/20241114141127.23232-1-yangyic...@huawei.com/

Change since v8:
- Fix WARN on ACPI based non-SMT platform noticed in v7, per Pierre.
Link: https://lore.kernel.org/all/20241105093237.63565-1-yangyic...@huawei.com/

Change since v7:
Address the comments from Thomas:
- Add a newline between the glue define and function of 
topology_is_primary_thread
- Explicitly mention the sibling mask won't be empty in the comment
Link: https://lore.kernel.org/lkml/20241030125415.18994-1-yangyic...@huawei.com/

Change since v6:
- Fix unused variable if !CONFIG_ARM64 || !CONFIG_RISV found by lkp-test
- Fix max_smt_thread_num updating in OF path pointed by Pierre
- Drop unused variable and refine the comments/commit per Pierre
Link: 
https://lore.kernel.org/linux-arm-kernel/20241015021841.35713-1-yangyic...@huawei.com/

Change since v5:
- Drop the dependency on CONFIG_SMP since it's always on arm64, per Pierre
- Avoid potential multiple calls of cpu_smt_set_num_threads() on asymmetric 
system, per Dietmar
- Detect heterogeneous SMT topology and issue a warning for partly support, per 
Pierre
- Thanks Dietmar for testing, didn't pickup the tag due to code changes. Thanks 
testing by Pierre
Link: 
https://lore.kernel.org/linux-arm-kernel/20240806085320.63514-1-yangyic...@huawei.com/

Change since v4:
- Provide a default topology_is_primary_thread() in the framework, Per Will
Link: 
https://lore.kernel.org/linux-arm-kernel/20231121092602.47792-1-yangyic...@huawei.com/

Change since v3:
- Fix some build and kconfig error reported by kernel test robot 

Link: 
https://lore.kernel.org/linux-arm-kernel/20231114040110.54590-1-yangyic...@huawei.com/

Change since v2:
- Detect SMT thread number at topology build from ACPI/DT, avoid looping CPUs
- Split patches into ACPI/OF/arch_topology path and enable the kconfig for arm64
Link: 
https://lore.kernel.org/linux-arm-kernel/20231010115335.13862-1-yangyic...@huawei.com/

Yicong Yang (4):
  cpu/SMT: Provide a default topology_is_primary_thread()
  arch_topology: Support SMT control for OF based system
  arm64: topology: Support SMT control on ACPI based system
  arm64: Kconfig: Enable HOTPLUG_SMT

 arch/arm64/Kconfig  |  1 +
 arch/arm64/kernel/topology.c| 54 +
 arch/powerpc/include/asm/topology.h |  1 +
 arch/x86/include/asm/topology.h |  2 +-
 drivers/base/arch_topology.c| 18 ++
 include/linux/topology.h| 24 +
 6 files changed, 99 insertions(+), 1 deletion(-)

-- 
2.24.0

[PATCH v12 4/4] arm64: Kconfig: Enable HOTPLUG_SMT

2025-03-11 Thread Yicong Yang

From: Yicong Yang 

Enable HOTPLUG_SMT for SMT control.

Reviewed-by: Jonathan Cameron 
Reviewed-by: Pierre Gondois 
Reviewed-by: Dietmar Eggemann 
Signed-off-by: Yicong Yang 
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 940343beb3d4..65fe00b1922c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -250,6 +250,7 @@ config ARM64
select HAVE_KRETPROBES
select HAVE_GENERIC_VDSO
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+   select HOTPLUG_SMT if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
-- 
2.24.0

Re: [PATCH 05/13] MIPS: make setup_zero_pages() use memblock

2025-03-11 Thread Mike Rapoport

On Thu, Mar 06, 2025 at 08:51:15PM +0200, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" 
> 
> Allocating the zero pages from memblock is simpler because the memory is
> already reserved.
> 
> This will also help with pulling out memblock_free_all() to the generic
> code and reducing code duplication in arch::mem_init().
> 
> Signed-off-by: Mike Rapoport (Microsoft) 
> ---
>  arch/mips/include/asm/mmzone.h |  2 --
>  arch/mips/mm/init.c| 16 +---
>  2 files changed, 5 insertions(+), 13 deletions(-)
 
Andrew can you please pick this as a fixup?

>From 148713d17cbdf7a3ad08f18ba203185b70c0e7c2 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (Microsoft)" 
Date: Tue, 11 Mar 2025 07:51:27 +0200
Subject: [PATCH] MIPS: use memblock_alloc_or_panic() in setup_zero_page()

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/mips/mm/init.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 6ea27bbd387e..a673d3d68254 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -68,9 +68,7 @@ static void __init setup_zero_pages(void)
else
order = 0;
 
-   empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE << order, 
PAGE_SIZE);
-   if (!empty_zero_page)
-   panic("Oh boy, that early out of memory?");
+   empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << 
order, PAGE_SIZE);
 
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
-- 
2.47.2

-- 
Sincerely yours,
Mike.

Re: [PATCH v7 7/7] powerpc/pseries: Add a char driver for physical-attestation RTAS

2025-03-11 Thread kernel test robot

Hi Haren,

kernel test robot noticed the following build warnings:

[auto build test WARNING on powerpc/next]
[also build test WARNING on powerpc/fixes linus/master v6.14-rc6]
[cannot apply to next-20250307]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Haren-Myneni/powerpc-pseries-Define-common-functions-for-RTAS-sequence-calls/20250310-054319
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
patch link:
https://lore.kernel.org/r/20250309213916.762116-8-haren%40linux.ibm.com
patch subject: [PATCH v7 7/7] powerpc/pseries: Add a char driver for 
physical-attestation RTAS
config: powerpc64-randconfig-r072-20250311 
(https://download.01.org/0day-ci/archive/20250311/202503111945.ijp4nbkl-...@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 
e15545cad8297ec7555f26e5ae74a9f0511203e7)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20250311/202503111945.ijp4nbkl-...@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202503111945.ijp4nbkl-...@intel.com/

All warnings (new ones prefixed by >>):

>> arch/powerpc/platforms/pseries/papr-phy-attest.c:137: warning: Function 
>> parameter or struct member 'seq' not described in 'phy_attest_sequence_begin'


vim +137 arch/powerpc/platforms/pseries/papr-phy-attest.c

   121  
   122  /*
   123   * Internal physical-attestation sequence APIs. A physical-attestation
   124   * sequence is a series of calls to get ibm,physical-attestation
   125   * for a given attestation command. The sequence ends when an error
   126   * is encountered or all data for the attestation command has been
   127   * returned.
   128   */
   129  
   130  /**
   131   * phy_attest_sequence_begin() - Begin a response data for attestation
   132   * command retrieval sequence.
   133   *
   134   * Context: May sleep.
   135   */
   136  static void phy_attest_sequence_begin(struct papr_rtas_sequence *seq)
 > 137  {
   138  struct rtas_phy_attest_params *param;
   139  
   140  /*
   141   * We could allocate the work area before acquiring the
   142   * function lock, but that would allow concurrent requests to
   143   * exhaust the limited work area pool for no benefit. So
   144   * allocate the work area under the lock.
   145   */
   146  mutex_lock(&rtas_ibm_physical_attestation_lock);
   147  param =  (struct rtas_phy_attest_params *)seq->params;
   148  param->work_area = rtas_work_area_alloc(SZ_4K);
   149  memcpy(rtas_work_area_raw_buf(param->work_area), ¶m->cmd,
   150  param->cmd_len);
   151  param->sequence = 1;
   152  param->status = 0;
   153  }
   154  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH] selftest/powerpc/mm/pkey: fix build-break introduced by commit 00894c3fc917

2025-03-11 Thread Madhavan Srinivasan

Build break was reported in the powerpc mailing list for next-20250218 with 
below errors

make[1]: Nothing to be done for 'all'.
BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/mm; mkdir 
-p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C mm all
  CC   pkey_exec_prot
In file included from pkey_exec_prot.c:18:
/root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In 
function ‘pkeys_unsupported’:
/root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34: 
error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function)
   96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
  |  ^

https://lore.kernel.org/all/20250113170619.484698-2-yury.khrusta...@arm.com/ 
patchset
has been queued to arm64/for-next/pkey_unrestricted which is causing a build 
break
in the selftest/powerpc builds.

Commit 6d61527d931ba ("mm/pkey: Add PKEY_UNRESTRICTED macro") added a macro
PKEY_UNRESTRICTED to handle implicit literal value of 0x0 (which is 
"unrestricted").
Add the same to selftest/powerpc/pkeys.h to fix the reported build break.

Reported-by: Venkat Rao Bagalkote 
Closes: 
https://lore.kernel.org/lkml/3267ea6e-5a1a-4752-96ef-8351c912d...@linux.ibm.com/T/
Tested-by: Venkat Rao Bagalkote 
Signed-off-by: Madhavan Srinivasan 
---
Catalin, can you take this fix via arm64/for-next/pkey_unrestricted?
Patch applies clean on top of arm64/for-next/pkey_unrestricted

 tools/testing/selftests/powerpc/include/pkeys.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/powerpc/include/pkeys.h 
b/tools/testing/selftests/powerpc/include/pkeys.h
index c6d4063dd4f6..d6deb6ffa1b9 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,6 +24,9 @@
 #undef PKEY_DISABLE_EXECUTE
 #define PKEY_DISABLE_EXECUTE   0x4
 
+#undef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED  0x0
+
 /* Older versions of libc do not define this */
 #ifndef SEGV_PKUERR
 #define SEGV_PKUERR4
-- 
2.48.1

Re: [PATCH v4 net-next 02/14] net: enetc: add command BD ring support for i.MX95 ENETC

2025-03-11 Thread Michal Kubiak

On Tue, Mar 11, 2025 at 01:38:18PM +0800, Wei Fang wrote:
> The command BD ring is used to configure functionality where the
> underlying resources may be shared between different entities or being
> too large to configure using direct registers (such as lookup tables).
> 
> Because the command BD and table formats of i.MX95 and LS1028A are very
> different, the software processing logic is also different. In order to
> ensure driver compatibility, struct enetc_si_ops is introduced. This
> structure defines some hooks shared by VSI and PSI. Different hardware
> driver will register different hooks, For example, setup_cbdr() is used
> to initialize the command BD ring, and teardown_cbdr() is used to free
> the command BD ring.
> 
> Signed-off-by: Wei Fang 

Thanks,
Reviewed-by: Michal Kubiak

Re: [PATCH v4 6/6] powerpc/kvm-hv-pmu: Add perf-events for Hostwide counters

2025-03-11 Thread Vaibhav Jain

Athira Rajeev  writes:

>> On 24 Feb 2025, at 6:45 PM, Vaibhav Jain  wrote:
>> 
>> Update 'kvm-hv-pmu.c' to add five new perf-events mapped to the five
>> Hostwide counters. Since these newly introduced perf events are at system
>> wide scope and can be read from any L1-Lpar CPU, 'kvmppc_pmu' scope and
>> capabilities are updated appropriately.
>> 
>> Also introduce two new helpers. First is kvmppc_update_l0_stats() that uses
>> the infrastructure introduced in previous patches to issues the
>> H_GUEST_GET_STATE hcall L0-PowerVM to fetch guest-state-buffer holding the
>> latest values of these counters which is then parsed and 'l0_stats'
>> variable updated.
>> 
>> Second helper is kvmppc_pmu_event_update() which is called from
>> 'kvmppv_pmu' callbacks and uses kvmppc_update_l0_stats() to update
>> 'l0_stats' and the update the 'struct perf_event's event-counter.
>> 
>> Some minor updates to kvmppc_pmu_{add, del, read}() to remove some debug
>> scaffolding code.
>> 
>> Signed-off-by: Vaibhav Jain 
>> ---
>> Changelog
>> 
>> v3->v4:
>> * Minor tweaks to patch description and code as its now being built as a
>> separate kernel module.
>> 
>> v2->v3:
>> None
>> 
>> v1->v2:
>> None
>> ---
>> arch/powerpc/perf/kvm-hv-pmu.c | 92 +-
>> 1 file changed, 91 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
>> index ed371454f7b5..274459bb32d6 100644
>> --- a/arch/powerpc/perf/kvm-hv-pmu.c
>> +++ b/arch/powerpc/perf/kvm-hv-pmu.c
>> @@ -30,6 +30,11 @@
>> #include "asm/guest-state-buffer.h"
>> 
>> enum kvmppc_pmu_eventid {
>> + KVMPPC_EVENT_HOST_HEAP,
>> + KVMPPC_EVENT_HOST_HEAP_MAX,
>> + KVMPPC_EVENT_HOST_PGTABLE,
>> + KVMPPC_EVENT_HOST_PGTABLE_MAX,
>> + KVMPPC_EVENT_HOST_PGTABLE_RECLAIM,
>> KVMPPC_EVENT_MAX,
>> };
>> 
>> @@ -61,8 +66,14 @@ static DEFINE_SPINLOCK(lock_l0_stats);
>> /* GSB related structs needed to talk to L0 */
>> static struct kvmppc_gs_msg *gsm_l0_stats;
>> static struct kvmppc_gs_buff *gsb_l0_stats;
>> +static struct kvmppc_gs_parser gsp_l0_stats;
>> 
>> static struct attribute *kvmppc_pmu_events_attr[] = {
>> + KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP),
>> + KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX),
>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE),
>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX),
>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, 
>> KVMPPC_EVENT_HOST_PGTABLE_RECLAIM),
>> NULL,
>> };
>> 
>> @@ -71,7 +82,7 @@ static const struct attribute_group 
>> kvmppc_pmu_events_group = {
>> .attrs = kvmppc_pmu_events_attr,
>> };
>> 
>> -PMU_FORMAT_ATTR(event, "config:0");
>> +PMU_FORMAT_ATTR(event, "config:0-5");
>> static struct attribute *kvmppc_pmu_format_attr[] = {
>> &format_attr_event.attr,
>> NULL,
>> @@ -88,6 +99,79 @@ static const struct attribute_group 
>> *kvmppc_pmu_attr_groups[] = {
>> NULL,
>> };
>> 
>> +/*
>> + * Issue the hcall to get the L0-host stats.
>> + * Should be called with l0-stat lock held
>> + */
>> +static int kvmppc_update_l0_stats(void)
>> +{
>> + int rc;
>> +
>> + /* With HOST_WIDE flags guestid and vcpuid will be ignored */
>> + rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE);
>> + if (rc)
>> + goto out;
>> +
>> + /* Parse the guest state buffer is successful */
>> + rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats);
>> + if (rc)
>> + goto out;
>> +
>> + /* Update the l0 returned stats*/
>> + memset(&l0_stats, 0, sizeof(l0_stats));
>> + rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats);
>> +
>> +out:
>> + return rc;
>> +}
>> +
>> +/* Update the value of the given perf_event */
>> +static int kvmppc_pmu_event_update(struct perf_event *event)
>> +{
>> + int rc;
>> + u64 curr_val, prev_val;
>> + unsigned long flags;
>> + unsigned int config = event->attr.config;
>> +
>> + /* Ensure no one else is modifying the l0_stats */
>> + spin_lock_irqsave(&lock_l0_stats, flags);
>> +
>> + rc = kvmppc_update_l0_stats();
>> + if (!rc) {
>> + switch (config) {
>> + case KVMPPC_EVENT_HOST_HEAP:
>> + curr_val = l0_stats.guest_heap;
>> + break;
>> + case KVMPPC_EVENT_HOST_HEAP_MAX:
>> + curr_val = l0_stats.guest_heap_max;
>> + break;
>> + case KVMPPC_EVENT_HOST_PGTABLE:
>> + curr_val = l0_stats.guest_pgtable_size;
>> + break;
>> + case KVMPPC_EVENT_HOST_PGTABLE_MAX:
>> + curr_val = l0_stats.guest_pgtable_size_max;
>> + break;
>> + case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM:
>> + curr_val = l0_stats.guest_pgtable_reclaim;
>> + break;
>> + default:
>> + rc = -ENOENT;
>> + break;
>> + }
>> + }
>> +
>> + spin_unlock_irqrestore(&lock_l0_stats, flags);
>> +
>> + /* If no error than update the perf event */
>> + if (!rc) {
>> + prev_val = local64_xchg(&event->hw.prev_count, curr_val);
>> + if (curr_val > prev_val)
>> + local64_add(curr_val - prev_val, &event->count);
>> + }
>> +
>> + return rc;
>> +}
>> +
>> static int kvmppc_pmu_event_init(struct perf_event *event)
>> {
>> unsigned int co

Re: [linux-next-20250307] Build Failure

2025-03-11 Thread Venkat Rao Bagalkote

Git Bisect is poinitng to commit: 
3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b as first bad commit.



Git bisect log:

git bisect start
# status: waiting for both good and bad commits
# bad: [0a2f889128969dab41861b6e40111aa03dc57014] Add linux-next 
specific files for 20250307

git bisect bad 0a2f889128969dab41861b6e40111aa03dc57014
# status: waiting for good commit(s), bad commit known
# good: [7eb172143d5508b4da468ed59ee857c6e5e01da6] Linux 6.14-rc5
git bisect good 7eb172143d5508b4da468ed59ee857c6e5e01da6
# good: [7eb172143d5508b4da468ed59ee857c6e5e01da6] Linux 6.14-rc5
git bisect good 7eb172143d5508b4da468ed59ee857c6e5e01da6
# good: [80ec13b98c6378cbf9b29d7ee7d3db930ddbd858] Merge branch 'master' 
of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git

git bisect good 80ec13b98c6378cbf9b29d7ee7d3db930ddbd858
# good: [6c60220c45270869a7c5f791f6e0197b1f0d0388] Merge branch 
'driver-core-next' of 
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git

git bisect good 6c60220c45270869a7c5f791f6e0197b1f0d0388
# good: [187734f508b0a9a00ccaaf7d8ba05874b624ac73] Merge branch 
'for-next' of 
git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git

git bisect good 187734f508b0a9a00ccaaf7d8ba05874b624ac73
# good: [316ff3a28679b82eb2bf17c02dbca970e7433182] Merge branch 
'for-next/seccomp' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git

git bisect good 316ff3a28679b82eb2bf17c02dbca970e7433182
# bad: [69759c9d8dd7df716dcca3601b82e5618332cef7] Merge branch 
'rust-next' of https://github.com/Rust-for-Linux/linux.git

git bisect bad 69759c9d8dd7df716dcca3601b82e5618332cef7
# bad: [1e4eee5176c91b00e73cee90712a995668020a9c] Merge branch 
'mhi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mani/mhi.git

git bisect bad 1e4eee5176c91b00e73cee90712a995668020a9c
# bad: [962bc2aae4f4295314d4a5f5c59a465f97f8b59a] Merge branch 
'for-next' of 
git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-auxdisplay.git

git bisect bad 962bc2aae4f4295314d4a5f5c59a465f97f8b59a
# bad: [612fd001319aae2b514fc234939806cf3294dbba] Merge branch 'main' of 
git://git.infradead.org/users/willy/xarray.git

git bisect bad 612fd001319aae2b514fc234939806cf3294dbba
# bad: [f5175dd69428ab517c8d68e772c4d287b6570d8e] dt-bindings: nvmem: 
fixed-cell: increase bits start value to 31

git bisect bad f5175dd69428ab517c8d68e772c4d287b6570d8e
# bad: [8c94337ebbfb840944574f82df0cbe35930d8df8] dt-bindings: nvmem: 
rockchip,otp: Add compatible for RK3576

git bisect bad 8c94337ebbfb840944574f82df0cbe35930d8df8
# bad: [024e21343f3cbcde0343473fcaf094d2c19cc7bf] nvmem: rockchip-otp: 
Move read-offset into variant-data

git bisect bad 024e21343f3cbcde0343473fcaf094d2c19cc7bf
# bad: [3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b] clk: rockchip: rk3576: 
define clk_otp_phy_g

git bisect bad 3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b
# first bad commit: [3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b] clk: 
rockchip: rk3576: define clk_otp_phy_g


On 09/03/25 6:08 pm, Venkat Rao Bagalkote wrote:

Greetings!!,

I see linux-next-20250307 fails to build on IBM Power9 and Power10 
servers.



Errors:

In file included from ^[[01m^[[K^[[m^[[K:
^[[01m^[[K./usr/include/cxl/features.h:11:10:^[[m^[[K 
^[[01;31m^[[Kfatal error: ^[[m^[[Kuuid/uuid.h: No such file or directory

   11 | #include ^[[01;31m^[[K^[[m^[[K
  |  ^[[01;31m^[[K^^[[m^[[K
compilation terminated.
make[4]: *** [usr/include/Makefile:85: 
usr/include/cxl/features.hdrtest] Error 1

make[3]: *** [scripts/Makefile.build:461: usr/include] Error 2
make[2]: *** [scripts/Makefile.build:461: usr] Error 2
make[2]: *** Waiting for unfinished jobs
arch/powerpc/kernel/switch.o: warning: objtool: .text+0x4: 
intra_function_call not a direct call
arch/powerpc/crypto/ghashp8-ppc.o: warning: objtool: .text+0x22c: 
unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: 
.text+0xe84: intra_function_call not a direct call

make[1]: *** [/home/linux_src/linux/Makefile:1997: .] Error 2
make: *** [Makefile:251: __sub-make] Error 2

Please add below tag, if you happen to fix this issue.

Reported-by: Venkat Rao Bagalkote 


Regards,

Venkat.

[PATCH v4 net-next 09/14] net: enetc: enable RSS feature by default

2025-03-11 Thread Wei Fang

Receive side scaling (RSS) is a network driver technology that enables
the efficient distribution of network receive processing across multiple
CPUs in multiprocessor systems. Therefore, it is better to enable RSS by
default so that the CPU load can be balanced and network performance can
be improved when then network is enabled.

Signed-off-by: Wei Fang 
---
 drivers/net/ethernet/freescale/enetc/enetc.c  | 35 ++-
 .../freescale/enetc/enetc_pf_common.c |  4 ++-
 .../net/ethernet/freescale/enetc/enetc_vf.c   |  4 ++-
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c 
b/drivers/net/ethernet/freescale/enetc/enetc.c
index 5b5e65ac8fab..8583ac9f7b9e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2420,6 +2420,22 @@ static void enetc_set_lso_flags_mask(struct enetc_hw *hw)
enetc_wr(hw, ENETC4_SILSOSFMR1, 0);
 }
 
+static int enetc_set_rss(struct net_device *ndev, int en)
+{
+   struct enetc_ndev_priv *priv = netdev_priv(ndev);
+   struct enetc_hw *hw = &priv->si->hw;
+   u32 reg;
+
+   enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings);
+
+   reg = enetc_rd(hw, ENETC_SIMR);
+   reg &= ~ENETC_SIMR_RSSE;
+   reg |= (en) ? ENETC_SIMR_RSSE : 0;
+   enetc_wr(hw, ENETC_SIMR, reg);
+
+   return 0;
+}
+
 int enetc_configure_si(struct enetc_ndev_priv *priv)
 {
struct enetc_si *si = priv->si;
@@ -2440,6 +2456,9 @@ int enetc_configure_si(struct enetc_ndev_priv *priv)
err = enetc_setup_default_rss_table(si, priv->num_rx_rings);
if (err)
return err;
+
+   if (priv->ndev->features & NETIF_F_RXHASH)
+   enetc_set_rss(priv->ndev, true);
}
 
return 0;
@@ -3232,22 +3251,6 @@ struct net_device_stats *enetc_get_stats(struct 
net_device *ndev)
 }
 EXPORT_SYMBOL_GPL(enetc_get_stats);
 
-static int enetc_set_rss(struct net_device *ndev, int en)
-{
-   struct enetc_ndev_priv *priv = netdev_priv(ndev);
-   struct enetc_hw *hw = &priv->si->hw;
-   u32 reg;
-
-   enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings);
-
-   reg = enetc_rd(hw, ENETC_SIMR);
-   reg &= ~ENETC_SIMR_RSSE;
-   reg |= (en) ? ENETC_SIMR_RSSE : 0;
-   enetc_wr(hw, ENETC_SIMR, reg);
-
-   return 0;
-}
-
 static void enetc_enable_rxvlan(struct net_device *ndev, bool en)
 {
struct enetc_ndev_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c 
b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
index c346e0e3ad37..a737a7f8c79e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c
@@ -128,8 +128,10 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct 
net_device *ndev,
if (si->hw_features & ENETC_SI_F_LSO)
priv->active_offloads |= ENETC_F_LSO;
 
-   if (si->num_rss)
+   if (si->num_rss) {
ndev->hw_features |= NETIF_F_RXHASH;
+   ndev->features |= NETIF_F_RXHASH;
+   }
 
/* TODO: currently, i.MX95 ENETC driver does not support advanced 
features */
if (!is_enetc_rev1(si)) {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c 
b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 072e5b40a199..3372a9a779a6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -155,8 +155,10 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, 
struct net_device *ndev,
ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM |
  NETIF_F_TSO | NETIF_F_TSO6;
 
-   if (si->num_rss)
+   if (si->num_rss) {
ndev->hw_features |= NETIF_F_RXHASH;
+   ndev->features |= NETIF_F_RXHASH;
+   }
 
/* pick up primary MAC address from SI */
enetc_load_primary_mac_addr(&si->hw, ndev);
-- 
2.34.1

Re: [linux-next-20250307] Build Failure

2025-03-11 Thread Nicolas Frattaroli

On Tuesday, 11 March 2025 05:23:23 Central European Standard Time Venkat Rao 
Bagalkote wrote:
> Git Bisect is poinitng to commit:
> 3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b as first bad commit.

That does not seem like a correct bisection to me. The build error isn't even 
in the same translation unit as the change the commit you landed on touches, 
much less any subsystem even remotely related to it.

> 
> Git bisect log:
> 
> git bisect start
> # status: waiting for both good and bad commits
> # bad: [0a2f889128969dab41861b6e40111aa03dc57014] Add linux-next
> specific files for 20250307
> git bisect bad 0a2f889128969dab41861b6e40111aa03dc57014
> # status: waiting for good commit(s), bad commit known
> # good: [7eb172143d5508b4da468ed59ee857c6e5e01da6] Linux 6.14-rc5
> git bisect good 7eb172143d5508b4da468ed59ee857c6e5e01da6
> # good: [7eb172143d5508b4da468ed59ee857c6e5e01da6] Linux 6.14-rc5
> git bisect good 7eb172143d5508b4da468ed59ee857c6e5e01da6
> # good: [80ec13b98c6378cbf9b29d7ee7d3db930ddbd858] Merge branch 'master'
> of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
> git bisect good 80ec13b98c6378cbf9b29d7ee7d3db930ddbd858
> # good: [6c60220c45270869a7c5f791f6e0197b1f0d0388] Merge branch
> 'driver-core-next' of
> git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
> git bisect good 6c60220c45270869a7c5f791f6e0197b1f0d0388
> # good: [187734f508b0a9a00ccaaf7d8ba05874b624ac73] Merge branch
> 'for-next' of
> git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git
> git bisect good 187734f508b0a9a00ccaaf7d8ba05874b624ac73
> # good: [316ff3a28679b82eb2bf17c02dbca970e7433182] Merge branch
> 'for-next/seccomp' of
> git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git
> git bisect good 316ff3a28679b82eb2bf17c02dbca970e7433182
> # bad: [69759c9d8dd7df716dcca3601b82e5618332cef7] Merge branch
> 'rust-next' of https://github.com/Rust-for-Linux/linux.git
> git bisect bad 69759c9d8dd7df716dcca3601b82e5618332cef7
> # bad: [1e4eee5176c91b00e73cee90712a995668020a9c] Merge branch
> 'mhi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mani/mhi.git
> git bisect bad 1e4eee5176c91b00e73cee90712a995668020a9c
> # bad: [962bc2aae4f4295314d4a5f5c59a465f97f8b59a] Merge branch
> 'for-next' of
> git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-auxdisplay.git
> git bisect bad 962bc2aae4f4295314d4a5f5c59a465f97f8b59a
> # bad: [612fd001319aae2b514fc234939806cf3294dbba] Merge branch 'main' of
> git://git.infradead.org/users/willy/xarray.git
> git bisect bad 612fd001319aae2b514fc234939806cf3294dbba
> # bad: [f5175dd69428ab517c8d68e772c4d287b6570d8e] dt-bindings: nvmem:
> fixed-cell: increase bits start value to 31
> git bisect bad f5175dd69428ab517c8d68e772c4d287b6570d8e
> # bad: [8c94337ebbfb840944574f82df0cbe35930d8df8] dt-bindings: nvmem:
> rockchip,otp: Add compatible for RK3576
> git bisect bad 8c94337ebbfb840944574f82df0cbe35930d8df8
> # bad: [024e21343f3cbcde0343473fcaf094d2c19cc7bf] nvmem: rockchip-otp:
> Move read-offset into variant-data
> git bisect bad 024e21343f3cbcde0343473fcaf094d2c19cc7bf
> # bad: [3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b] clk: rockchip: rk3576:
> define clk_otp_phy_g
> git bisect bad 3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b
> # first bad commit: [3e081aa132bbefe31ac95dd6dfc8d787ffa83d0b] clk:
> rockchip: rk3576: define clk_otp_phy_g
> 
> On 09/03/25 6:08 pm, Venkat Rao Bagalkote wrote:
> > Greetings!!,
> > 
> > I see linux-next-20250307 fails to build on IBM Power9 and Power10
> > servers.
> > 
> > 
> > Errors:
> > 
> > In file included from ^[[01m^[[K^[[m^[[K:

"In file included from " seems a little suspicious.

> > ^[[01m^[[K./usr/include/cxl/features.h:11:10:^[[m^[[K
> > ^[[01;31m^[[Kfatal error: ^[[m^[[Kuuid/uuid.h: No such file or directory
> >11 | #include ^[[01;31m^[[K^[[m^[[K
> >   |  ^[[01;31m^[[K^^[[m^[[K
> > compilation terminated.
> > make[4]: *** [usr/include/Makefile:85:
> > usr/include/cxl/features.hdrtest] Error 1
> > make[3]: *** [scripts/Makefile.build:461: usr/include] Error 2
> > make[2]: *** [scripts/Makefile.build:461: usr] Error 2
> > make[2]: *** Waiting for unfinished jobs
> > arch/powerpc/kernel/switch.o: warning: objtool: .text+0x4:
> > intra_function_call not a direct call
> > arch/powerpc/crypto/ghashp8-ppc.o: warning: objtool: .text+0x22c:
> > unannotated intra-function call
> > arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool:
> > .text+0xe84: intra_function_call not a direct call
> > make[1]: *** [/home/linux_src/linux/Makefile:1997: .] Error 2
> > make: *** [Makefile:251: __sub-make] Error 2
> > 
> > Please add below tag, if you happen to fix this issue.
> > 
> > Reported-by: Venkat Rao Bagalkote 
> > 
> > 
> > Regards,
> > 
> > Venkat.

Regards,
Nicolas Frattaroli

Re: [PATCH v4 6/6] powerpc/kvm-hv-pmu: Add perf-events for Hostwide counters

2025-03-11 Thread Athira Rajeev




> On 11 Mar 2025, at 3:02 PM, Vaibhav Jain  wrote:
> 
> Athira Rajeev  writes:
> 
>>> On 24 Feb 2025, at 6:45 PM, Vaibhav Jain  wrote:
>>> 
>>> Update 'kvm-hv-pmu.c' to add five new perf-events mapped to the five
>>> Hostwide counters. Since these newly introduced perf events are at system
>>> wide scope and can be read from any L1-Lpar CPU, 'kvmppc_pmu' scope and
>>> capabilities are updated appropriately.
>>> 
>>> Also introduce two new helpers. First is kvmppc_update_l0_stats() that uses
>>> the infrastructure introduced in previous patches to issues the
>>> H_GUEST_GET_STATE hcall L0-PowerVM to fetch guest-state-buffer holding the
>>> latest values of these counters which is then parsed and 'l0_stats'
>>> variable updated.
>>> 
>>> Second helper is kvmppc_pmu_event_update() which is called from
>>> 'kvmppv_pmu' callbacks and uses kvmppc_update_l0_stats() to update
>>> 'l0_stats' and the update the 'struct perf_event's event-counter.
>>> 
>>> Some minor updates to kvmppc_pmu_{add, del, read}() to remove some debug
>>> scaffolding code.
>>> 
>>> Signed-off-by: Vaibhav Jain 
>>> ---
>>> Changelog
>>> 
>>> v3->v4:
>>> * Minor tweaks to patch description and code as its now being built as a
>>> separate kernel module.
>>> 
>>> v2->v3:
>>> None
>>> 
>>> v1->v2:
>>> None
>>> ---
>>> arch/powerpc/perf/kvm-hv-pmu.c | 92 +-
>>> 1 file changed, 91 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
>>> index ed371454f7b5..274459bb32d6 100644
>>> --- a/arch/powerpc/perf/kvm-hv-pmu.c
>>> +++ b/arch/powerpc/perf/kvm-hv-pmu.c
>>> @@ -30,6 +30,11 @@
>>> #include "asm/guest-state-buffer.h"
>>> 
>>> enum kvmppc_pmu_eventid {
>>> + KVMPPC_EVENT_HOST_HEAP,
>>> + KVMPPC_EVENT_HOST_HEAP_MAX,
>>> + KVMPPC_EVENT_HOST_PGTABLE,
>>> + KVMPPC_EVENT_HOST_PGTABLE_MAX,
>>> + KVMPPC_EVENT_HOST_PGTABLE_RECLAIM,
>>> KVMPPC_EVENT_MAX,
>>> };
>>> 
>>> @@ -61,8 +66,14 @@ static DEFINE_SPINLOCK(lock_l0_stats);
>>> /* GSB related structs needed to talk to L0 */
>>> static struct kvmppc_gs_msg *gsm_l0_stats;
>>> static struct kvmppc_gs_buff *gsb_l0_stats;
>>> +static struct kvmppc_gs_parser gsp_l0_stats;
>>> 
>>> static struct attribute *kvmppc_pmu_events_attr[] = {
>>> + KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP),
>>> + KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX),
>>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE),
>>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX),
>>> + KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, 
>>> KVMPPC_EVENT_HOST_PGTABLE_RECLAIM),
>>> NULL,
>>> };
>>> 
>>> @@ -71,7 +82,7 @@ static const struct attribute_group 
>>> kvmppc_pmu_events_group = {
>>> .attrs = kvmppc_pmu_events_attr,
>>> };
>>> 
>>> -PMU_FORMAT_ATTR(event, "config:0");
>>> +PMU_FORMAT_ATTR(event, "config:0-5");
>>> static struct attribute *kvmppc_pmu_format_attr[] = {
>>> &format_attr_event.attr,
>>> NULL,
>>> @@ -88,6 +99,79 @@ static const struct attribute_group 
>>> *kvmppc_pmu_attr_groups[] = {
>>> NULL,
>>> };
>>> 
>>> +/*
>>> + * Issue the hcall to get the L0-host stats.
>>> + * Should be called with l0-stat lock held
>>> + */
>>> +static int kvmppc_update_l0_stats(void)
>>> +{
>>> + int rc;
>>> +
>>> + /* With HOST_WIDE flags guestid and vcpuid will be ignored */
>>> + rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE);
>>> + if (rc)
>>> + goto out;
>>> +
>>> + /* Parse the guest state buffer is successful */
>>> + rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats);
>>> + if (rc)
>>> + goto out;
>>> +
>>> + /* Update the l0 returned stats*/
>>> + memset(&l0_stats, 0, sizeof(l0_stats));
>>> + rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats);
>>> +
>>> +out:
>>> + return rc;
>>> +}
>>> +
>>> +/* Update the value of the given perf_event */
>>> +static int kvmppc_pmu_event_update(struct perf_event *event)
>>> +{
>>> + int rc;
>>> + u64 curr_val, prev_val;
>>> + unsigned long flags;
>>> + unsigned int config = event->attr.config;
>>> +
>>> + /* Ensure no one else is modifying the l0_stats */
>>> + spin_lock_irqsave(&lock_l0_stats, flags);
>>> +
>>> + rc = kvmppc_update_l0_stats();
>>> + if (!rc) {
>>> + switch (config) {
>>> + case KVMPPC_EVENT_HOST_HEAP:
>>> + curr_val = l0_stats.guest_heap;
>>> + break;
>>> + case KVMPPC_EVENT_HOST_HEAP_MAX:
>>> + curr_val = l0_stats.guest_heap_max;
>>> + break;
>>> + case KVMPPC_EVENT_HOST_PGTABLE:
>>> + curr_val = l0_stats.guest_pgtable_size;
>>> + break;
>>> + case KVMPPC_EVENT_HOST_PGTABLE_MAX:
>>> + curr_val = l0_stats.guest_pgtable_size_max;
>>> + break;
>>> + case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM:
>>> + curr_val = l0_stats.guest_pgtable_reclaim;
>>> + break;
>>> + default:
>>> + rc = -ENOENT;
>>> + break;
>>> + }
>>> + }
>>> +
>>> + spin_unlock_irqrestore(&lock_l0_stats, flags);
>>> +
>>> + /* If no error than update the perf event */
>>> + if (!rc) {
>>> + prev_val = local64_xchg(&event->hw.prev_count, curr_val

[PATCH 2/2] powerpc, bpf: Inline bpf_get_smp_processor_id()

2025-03-11 Thread Saket Kumar Bhaskar

Inline the calls to bpf_get_smp_processor_id() in the powerpc bpf jit.

powerpc saves the Logical processor number (paca_index) in paca.

Here is how the powerpc JITed assembly changes after this commit:

Before:

cpu = bpf_get_smp_processor_id();

addis 12, 2, -517
addi 12, 12, -29456
mtctr 12
bctrl
mr  8, 3

After:

cpu = bpf_get_smp_processor_id();

lhz 8, 8(13)

To evaluate the performance improvements introduced by this change,
the benchmark described in [1] was employed.

+---+---+---+--+
|  Name |  Before   |After  |   % change   |
|---+---+---+--|
| glob-arr-inc  | 41.580 ± 0.034M/s | 54.137 ± 0.019M/s |   + 30.20%   |
| arr-inc   | 39.592 ± 0.055M/s | 54.000 ± 0.026M/s |   + 36.39%   |
| hash-inc  | 25.873 ± 0.012M/s | 26.334 ± 0.058M/s |   + 1.78%|
+---+---+---+--+

[1] https://github.com/anakryiko/linux/commit/8dec900975ef

Signed-off-by: Saket Kumar Bhaskar 
---
 arch/powerpc/net/bpf_jit_comp.c   | 10 ++
 arch/powerpc/net/bpf_jit_comp64.c |  5 +
 2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3d4bd45a9a22..4b79b2d95469 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -445,6 +445,16 @@ bool bpf_jit_supports_percpu_insn(void)
return true;
 }
 
+bool bpf_jit_inlines_helper_call(s32 imm)
+{
+   switch (imm) {
+   case BPF_FUNC_get_smp_processor_id:
+   return true;
+   default:
+   return false;
+   }
+}
+
 void *arch_alloc_bpf_trampoline(unsigned int size)
 {
return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 06f06770ceea..a8de12c026da 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1087,6 +1087,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
 
+   if (insn[i].src_reg == 0 && imm == 
BPF_FUNC_get_smp_processor_id) {
+   EMIT(PPC_RAW_LHZ(bpf_to_ppc(BPF_REG_0), _R13, 
offsetof(struct paca_struct, paca_index)));
+   break;
+   }
+
ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
&func_addr, 
&func_addr_fixed);
if (ret < 0)
-- 
2.43.5

[PATCH 1/2] powerpc, bpf: Support internal-only MOV instruction to resolve per-CPU addrs

2025-03-11 Thread Saket Kumar Bhaskar

With the introduction of commit 7bdbf7446305 ("bpf: add special
internal-only MOV instruction to resolve per-CPU addrs"),
a new BPF instruction BPF_MOV64_PERCPU_REG has been added to
resolve absolute addresses of per-CPU data from their per-CPU
offsets. This update requires enabling support for this
instruction in the powerpc JIT compiler.

As of commit 7a0268fa1a36 ("[PATCH] powerpc/64: per cpu data
optimisations"), the per-CPU data offset for the CPU is stored in
the paca.

To support this BPF instruction in the powerpc JIT, the following
powerpc instructions are emitted:

mr dst_reg, src_reg //Move src_reg to dst_reg, if src_reg != dst_reg
ld tmp1_reg, 48(13) //Load per-CPU data offset from paca(r13) in 
tmp1_reg.
add dst_reg, dst_reg, tmp1_reg  //Add the per cpu offset to the dst.

To evaluate the performance improvements introduced by this change,
the benchmark described in [1] was employed.

Before Change:
glob-arr-inc   :   41.580 ± 0.034M/s
arr-inc:   39.592 ± 0.055M/s
hash-inc   :   25.873 ± 0.012M/s

After Change:
glob-arr-inc   :   42.024 ± 0.049M/s
arr-inc:   55.447 ± 0.031M/s
hash-inc   :   26.565 ± 0.014M/s

[1] https://github.com/anakryiko/linux/commit/8dec900975ef

Signed-off-by: Saket Kumar Bhaskar 
---
 arch/powerpc/net/bpf_jit_comp.c   | 5 +
 arch/powerpc/net/bpf_jit_comp64.c | 8 
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..3d4bd45a9a22 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -440,6 +440,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
return IS_ENABLED(CONFIG_PPC64);
 }
 
+bool bpf_jit_supports_percpu_insn(void)
+{
+   return true;
+}
+
 void *arch_alloc_bpf_trampoline(unsigned int size)
 {
return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 233703b06d7c..06f06770ceea 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -679,6 +679,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
 */
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+   if (insn_is_mov_percpu_addr(&insn[i])) {
+   if (dst_reg != src_reg)
+   EMIT(PPC_RAW_MR(dst_reg, src_reg));
+#ifdef CONFIG_SMP
+   EMIT(PPC_RAW_LD(tmp1_reg, _R13, offsetof(struct 
paca_struct, data_offset)));
+   EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
+#endif
+   }
if (imm == 1) {
/* special mov32 for zext */
EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 
31));
-- 
2.43.5

[PATCH 0/2] bpf: Inline helper in powerpc JIT

2025-03-11 Thread Saket Kumar Bhaskar

This series adds the support of internal only per-CPU instructions
and inlines the bpf_get_smp_processor_id() helper call for powerpc
BPF JIT.


Saket Kumar Bhaskar (2):
  powerpc, bpf: Support internal-only MOV instruction to resolve per-CPU
addrs
  powerpc, bpf: Inline bpf_get_smp_processor_id()

 arch/powerpc/net/bpf_jit_comp.c   | 15 +++
 arch/powerpc/net/bpf_jit_comp64.c | 13 +
 2 files changed, 28 insertions(+)

-- 
2.43.5

[PATCH v7 0/7] Add character devices for indices, platform-dump and physical-attestation RTAS

2025-03-11 Thread Haren Myneni

Several APIs such as rtas_get_indices(), rtas_get_dynamic_sensor(),
rtas_set_dynamic_indicator(), rtas_platform_dump() and
rtas_physical_attestation()  provided by librtas library are
implemented in user space using rtas syscall in combination with
writable mappings of /dev/mem. But this implementation is not
compatible with system lockdown which prohibits /dev/mem access.
The current kernel already provides char based driver interfaces
for several RTAS calls such as VPD and system parameters to
support lockdown feature.

This patch series adds new char based drivers, /dev/papr-indices
for ibm,get-indices, ibm,get-dynamic-sensor-state and
ibm,set-dynamic-indicator RTAS Calls. /dev/papr-platform-dump
for ibm,platform-dump and /dev/papr-physical-attestation
fir ibm,physical-attestation. Providing the similar
open/ioctl/read interfaces to the user space as in the case of
VPD and system parameters.

I have made changes to librtas library to use the new kernel
interfaces if the corresponding device entry is available.

This patch series has the following patches:
powerpc/pseries: Define common functions for RTAS sequence calls
- For some of sequence based RTAS calls, the OS should not start
  another sequence with different input until the previous sequence
  is completed. So the sequence should be completed during ioctl()
  and expose the entire buffer during read(). ibm,get-indices is
  sequence based RTAS function similar to ibm,get-vpd and we already
  have the corresponding implementation for VPD driver. So update
  papr_rtas_sequence struct for RTAS call specific functions and move
  the top level sequence functions in to a separate file.

powerpc/pseries: Define papr_indices_io_block for papr-indices ioctls
- /dev/papr-indices driver supports ibm,get-indices,
  ibm,get-dynamic-sensor-state and ibm,set-dynamic-indicator RTAS Calls.
  papr-indices.h introduces 3 different ioctls for these RTAS calls and
  the corresponding ioctl input buffer.

powerpc/pseries: Add papr-indices char driver for ibm,get-indices
- Introduce /dev/papr-indices char based driver and add support for
  get-indices RTAS function

powerpc/pseries: Add ibm,set-dynamic-indicator RTAS call support
- Update /dev/papr-indices for set-dynamic-indicator RTAS function

powerpc/pseries: Add ibm,get-dynamic-sensor-state RTAS call support
-  Update /dev/papr-indices for  get-dynamic-sensor-state RTAS function

powerpc/pseries: Add papr-platform-dump character driver for dump
   retrieval
- Introduce /dev/papr-platform-dump char driver and adds support for
  ibm,platform-dump. Received suggestions from the previous post as a
  separate patch - Updated the patch with invalidating the dump using
  a separate ioctl.

powerpc/pseries: Add a char driver for papr-physical-attestation RTAS
- Introduce /dev/papr-physical-attestation char driver to provide
  kernel interface for ibm,physical-attestation RTAS function.

Changelog:
v7:
- Pass the proper next value to the subsequent RTAS calls for the
  get-indices sequence RTAS. 
  (Vasireddy Sathvika found this bug).
 
v6:
- Define the proper command ID for PAPR_PHY_ATTEST_IOC_HANDLE ioctl
- Update ioctls description in ioctl-number.rst.

v5:
- Return with -EINPROGRESS in papr_platform_dump_invalidate_ioctl()
  if the complete dump is not read (Suggested by Michal Suchánek).

v4:
- Include patch "Add char driver for papr-physical-attestation RTAS"
  in this series. ibm,physical-attestation is sequence based RTAS
  call and the implementation is also similar to ibm,get-vpd and
  ibm,get-indices.

v3:
- put_unused_fd() only after get_unused_fd() successful for the failure
  case later ("Add papr-platform-dump character driver for dump
  retrieval" patch).

v2:
- Added unlock rtas_ibm_set_dynamic_indicator_lock and
  rtas_ibm_get_dynamic_sensor_state_lock mutex for failure cases
  as reported by Dan Carpenter
- Fixed build warnings for the proper function parameter descriptions
  as reported by kernel test robot 

Haren Myneni (7):
  powerpc/pseries: Define common functions for RTAS sequence calls
  powerpc/pseries: Define papr_indices_io_block for papr-indices ioctls
  powerpc/pseries: Add papr-indices char driver for ibm,get-indices
  powerpc/pseries: Add ibm,set-dynamic-indicator RTAS call support
  powerpc/pseries: Add ibm,get-dynamic-sensor-state RTAS call support
  powerpc/pseries: Add papr-platform-dump character driver for dump
retrieval
  powerpc/pseries: Add a char driver for physical-attestation RTAS

 .../userspace-api/ioctl/ioctl-number.rst  |   6 +
 arch/powerpc/include/asm/rtas.h   |   4 +
 arch/powerpc/include/uapi/asm/papr-indices.h  |  41 ++
 .../uapi/asm/papr-physical-attestation.h  |  31 ++
 .../include/uapi/asm/papr-platform-dump.h |  15 +
 arch/powerpc/kernel/rtas.c|   8 +-
 arch/powerpc/platforms/pseries/Makefile   |   3 +-
 arch/powerpc/platforms/pseries/papr-indices.c | 489 ++
 .../platforms/pseries/papr-phy-attest.c

Re: [PATCH v5 1/3] printf: convert self-test to KUnit

2025-03-11 Thread Tamir Duberstein

On Thu, Mar 6, 2025 at 7:25 AM Petr Mladek  wrote:
>
> On Fri 2025-02-21 15:34:30, Tamir Duberstein wrote:
> > Convert the printf() self-test to a KUnit test.
> >
> > In the interest of keeping the patch reasonably-sized this doesn't
> > refactor the tests into proper parameterized tests - it's all one big
> > test case.
> >
> > Signed-off-by: Tamir Duberstein 
> > ---
> >  Documentation/core-api/printk-formats.rst   |   4 +-
> >  MAINTAINERS |   2 +-
> >  lib/Kconfig.debug   |  12 +-
> >  lib/Makefile|   1 -
> >  lib/tests/Makefile  |   1 +
> >  lib/{test_printf.c => tests/printf_kunit.c} | 188 
> > +++-
> >  tools/testing/selftests/lib/config  |   1 -
> >  tools/testing/selftests/lib/printf.sh   |   4 -
> >  8 files changed, 117 insertions(+), 96 deletions(-)
> >
> > diff --git a/Documentation/core-api/printk-formats.rst 
> > b/Documentation/core-api/printk-formats.rst
> > index e0473da9..4bdc394e86af 100644
> > --- a/Documentation/core-api/printk-formats.rst
> > +++ b/Documentation/core-api/printk-formats.rst
> > @@ -661,7 +661,7 @@ Do *not* use it from C.
> >  Thanks
> >  ==
> >
> > -If you add other %p extensions, please extend  with
> > -one or more test cases, if at all feasible.
> > +If you add other %p extensions, please extend 
> > +with one or more test cases, if at all feasible.
> >
> >  Thank you for your cooperation and attention.
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index f076360ce3c6..b051ccf6b276 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -25510,8 +25510,8 @@ R:Sergey Senozhatsky 
> >  S:   Maintained
> >  T:   git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
> >  F:   Documentation/core-api/printk-formats.rst
> > -F:   lib/test_printf.c
> >  F:   lib/test_scanf.c
> > +F:   lib/tests/printf_kunit.c
> >  F:   lib/vsprintf.c
> >
> >  VT1211 HARDWARE MONITOR DRIVER
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 7ddbfdacf895..d2b15f633227 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -2436,6 +2436,15 @@ config ASYNC_RAID6_TEST
> >  config TEST_HEXDUMP
> >   tristate "Test functions located in the hexdump module at runtime"
> >
> > +config PRINTF_KUNIT_TEST
> > + tristate "KUnit test printf() family of functions at runtime" if 
> > !KUNIT_ALL_TESTS
> > + depends on KUNIT
> > + default KUNIT_ALL_TESTS
> > + help
> > +   Enable this option to test the printf functions at runtime.
> > +
> > +   If unsure, say N.
> > +
> >  config STRING_KUNIT_TEST
> >   tristate "KUnit test string functions at runtime" if !KUNIT_ALL_TESTS
> >   depends on KUNIT
> > @@ -2449,9 +2458,6 @@ config STRING_HELPERS_KUNIT_TEST
> >  config TEST_KSTRTOX
> >   tristate "Test kstrto*() family of functions at runtime"
> >
> > -config TEST_PRINTF
> > - tristate "Test printf() family of functions at runtime"
> > -
> >  config TEST_SCANF
> >   tristate "Test scanf() family of functions at runtime"
> >
> > diff --git a/lib/Makefile b/lib/Makefile
> > index 961aef91d493..f31e6a3100ba 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
> >  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
> >  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
> >  obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
> > -obj-$(CONFIG_TEST_PRINTF) += test_printf.o
> >  obj-$(CONFIG_TEST_SCANF) += test_scanf.o
> >
> >  obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
> > diff --git a/lib/tests/Makefile b/lib/tests/Makefile
> > index 8961fbcff7a4..183c6a838a5d 100644
> > --- a/lib/tests/Makefile
> > +++ b/lib/tests/Makefile
> > @@ -30,6 +30,7 @@ obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
> >  obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
> >  CFLAGS_overflow_kunit.o = $(call cc-disable-warning, 
> > tautological-constant-out-of-range-compare)
> >  obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
> > +obj-$(CONFIG_PRINTF_KUNIT_TEST) += printf_kunit.o
> >  obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
> >  obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
> >  obj-$(CONFIG_TEST_SORT) += test_sort.o
> > diff --git a/lib/test_printf.c b/lib/tests/printf_kunit.c
> > similarity index 87%
> > rename from lib/test_printf.c
> > rename to lib/tests/printf_kunit.c
> > index 59dbe4f9a4cb..287bbfb61148 100644
> > --- a/lib/test_printf.c
> > +++ b/lib/tests/printf_kunit.c
> > @@ -3,9 +3,7 @@
> >   * Test cases for printf facility.
> >   */
> >
> > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > -
> > -#include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -25,8 +23,6 @@
> >
> >  #include 
> >
> > -#include "../tools/testing/selftests/kselftest_module.h"
> > -
> >  #define BUF_SIZE 256
> >  #define PAD_SIZE 16
> >  #define FILL_CHAR '$'
> > @@ -37,12

Re: [PATCH v4 4/6] kvm powerpc/book3s-apiv2: Introduce kvm-hv specific PMU

2025-03-11 Thread Athira Rajeev




> On 10 Mar 2025, at 12:42 PM, Vaibhav Jain  wrote:
> 
> Athira Rajeev  writes:
> 
>>> On 24 Feb 2025, at 6:45 PM, Vaibhav Jain  wrote:
>>> 
>>> Introduce a new PMU named 'kvm-hv' inside a new module named 'kvm-hv-pmu'
>>> to report Book3s kvm-hv specific performance counters. This will expose
>>> KVM-HV specific performance attributes to user-space via kernel's PMU
>>> infrastructure and would enableusers to monitor active kvm-hv based guests.
>>> 
>>> The patch creates necessary scaffolding to for the new PMU callbacks and
>>> introduces the new kernel module name 'kvm-hv-pmu' which is built with
>>> CONFIG_KVM_BOOK3S_HV_PMU. The patch doesn't introduce any perf-events yet,
>>> which will be introduced in later patches
>>> 
>>> Signed-off-by: Vaibhav Jain 
>>> 
>>> ---
>>> Changelog
>>> 
>>> v3->v4:
>>> * Introduced a new kernel module named 'kmv-hv-pmu' to host the new PMU
>>> instead of building the as part of KVM-HV module. [ Maddy ]
>>> * Moved the code from arch/powerpc/kvm to arch/powerpc/perf [ Atheera ]
>>> * Added a new config named KVM_BOOK3S_HV_PMU to arch/powerpc/kvm/Kconfig
>>> 
>>> v2->v3:
>>> * Fixed a build warning reported by kernel build robot.
>>> Link:
>>> https://lore.kernel.org/oe-kbuild-all/202501171030.3x0gqw8g-...@intel.com
>>> 
>>> v1->v2:
>>> * Fixed an issue of kvm-hv not loading on baremetal kvm [Gautam]
>>> ---
>>> arch/powerpc/kvm/Kconfig   |  13 
>>> arch/powerpc/perf/Makefile |   2 +
>>> arch/powerpc/perf/kvm-hv-pmu.c | 138 +
>>> 3 files changed, 153 insertions(+)
>>> create mode 100644 arch/powerpc/perf/kvm-hv-pmu.c
>>> 
>>> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
>>> index dbfdc126bf14..5f0ce19e7e27 100644
>>> --- a/arch/powerpc/kvm/Kconfig
>>> +++ b/arch/powerpc/kvm/Kconfig
>>> @@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV
>>> depends on KVM_BOOK3S_64 && PPC_POWERNV
>>> select KVM_BOOK3S_HV_POSSIBLE
>>> select KVM_GENERIC_MMU_NOTIFIER
>>> + select KVM_BOOK3S_HV_PMU
>>> select CMA
>>> help
>>> Support running unmodified book3s_64 guest kernels in
>>> @@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
>>> those buggy L1s which saves the L2 state, at the cost of performance
>>> in all nested-capable guest entry/exit.
>>> 
>>> +config KVM_BOOK3S_HV_PMU
>>> + tristate "Hypervisor Perf events for KVM Book3s-HV"
>>> + depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS
>>> + help
>>> +  Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These
>>> +  Perf events give an overview of hypervisor performance overall
>>> +  instead of a specific guests. Currently the PMU reports
>>> +  L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like:
>>> +  * Total/Used Guest-Heap
>>> +  * Total/Used Guest Page-table Memory
>>> +  * Total amount of Guest Page-table Memory reclaimed
>>> +
>>> config KVM_BOOKE_HV
>>> bool
>>> 
>>> diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
>>> index ac2cf58d62db..7f53fcb7495a 100644
>>> --- a/arch/powerpc/perf/Makefile
>>> +++ b/arch/powerpc/perf/Makefile
>>> @@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o 
>>> hv-common.o
>>> 
>>> obj-$(CONFIG_VPA_PMU) += vpa-pmu.o
>>> 
>>> +obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o
>>> +
>>> obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
>>> 
>>> obj-$(CONFIG_PPC64) += $(obj64-y)
>>> diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
>>> new file mode 100644
>>> index ..c154f54e09e2
>>> --- /dev/null
>>> +++ b/arch/powerpc/perf/kvm-hv-pmu.c
>>> @@ -0,0 +1,138 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Description: PMUs specific to running nested KVM-HV guests
>>> + * on Book3S processors (specifically POWER9 and later).
>>> + */
>>> +
>>> +#define pr_fmt(fmt)  "kvmppc-pmu: " fmt
>>> +
>>> +#include "asm-generic/local64.h"
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>> +enum kvmppc_pmu_eventid {
>>> + KVMPPC_EVENT_MAX,
>>> +};
>>> +
>>> +static struct attribute *kvmppc_pmu_events_attr[] = {
>>> + NULL,
>>> +};
>>> +
>>> +static const struct attribute_group kvmppc_pmu_events_group = {
>>> + .name = "events",
>>> + .attrs = kvmppc_pmu_events_attr,
>>> +};
>>> +
>>> +PMU_FORMAT_ATTR(event, "config:0");
>>> +static struct attribute *kvmppc_pmu_format_attr[] = {
>>> + &format_attr_event.attr,
>>> + NULL,
>>> +};
>>> +
>>> +static struct attribute_group kvmppc_pmu_format_group = {
>>> + .name = "format",
>>> + .attrs = kvmppc_pmu_format_attr,
>>> +};
>>> +
>>> +static const struct attribute_group *kvmppc_pmu_attr_groups[] = {
>>> + &kvmppc_pmu_events_group,
>>> + &kvmppc_pmu_format_group,
>>> + NULL,
>>> +};
>>> +
>>> +static int kvmppc_pmu_event_init(struct perf_event *event)
>>> +{
>>> + unsigned int config = event->at

Re: [PATCH v3 1/2] book3s64/radix: Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n

2025-03-11 Thread Christophe Leroy





Le 10/03/2025 à 13:44, Donet Tom a écrit :

From: "Ritesh Harjani (IBM)" 

Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n


I don't understand your patch.

As far as I can see, CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP is selected 
when CONFIG_PPC_RADIX_MMU is selected, and radix_pgtable.o is built only 
when CONFIG_PPC_RADIX_MMU is selected. So when radix_pgtable.o is built 
CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP will always be selected.


Can you clarify what the problem is ?

Christophe



Signed-off-by: Ritesh Harjani (IBM) 
Signed-off-by: Donet Tom 
---
  arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 311e2112d782..bd6916419472 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long 
start,
return 0;
  }
  
-

+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
  bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap 
*pgmap)
  {
if (radix_enabled())
@@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, 
struct dev_pagemap *pgmap)
  
  	return false;

  }
+#endif
  
  int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,

unsigned long addr, unsigned long next)

Re: [PATCH v12 0/4] Support SMT control on arm64

2025-03-11 Thread Sudeep Holla

On Tue, Mar 11, 2025 at 03:51:39PM +0800, Yicong Yang wrote:
> From: Yicong Yang 
> 
> The core CPU control framework supports runtime SMT control which
> is not yet supported on arm64. Besides the general vulnerabilities
> concerns we want this runtime control on our arm64 server for:
> 
> - better single CPU performance in some cases
> - saving overall power consumption
> 
> This patchset implements it in the following aspects:
> 
> - Provides a default topology_is_primary_thread()
> - support retrieve SMT thread number on OF based system
> - support retrieve SMT thread number on ACPI based system
> - select HOTPLUG_SMT for arm64
> 
> Tests has been done on our ACPI based arm64 server and on ACPI/OF
> based QEMU VMs.
> 
> Change since v11:
> - Remove the check and warning for heterogeneous platform as suggested and 
> discussed

IIUC, the ask was not to remove it completely but to allow single
threaded and same number of threads in all the multi-threaded CPUs.
Anyways that is not a must, we can just stash max_smt_thread_num value
before updating with max and see if it changes from one value to another
with value != 1. It can be done later when we want to warn such systems
if they appear in the future and we can't support them. For now, it looks
fine.

-- 
Regards,
Sudeep

Re: [PATCH v12 1/4] cpu/SMT: Provide a default topology_is_primary_thread()

2025-03-11 Thread Sudeep Holla

On Tue, Mar 11, 2025 at 03:51:40PM +0800, Yicong Yang wrote:
> From: Yicong Yang 
> 
> Currently if architectures want to support HOTPLUG_SMT they need to
> provide a topology_is_primary_thread() telling the framework which
> thread in the SMT cannot offline. However arm64 doesn't have a
> restriction on which thread in the SMT cannot offline, a simplest
> choice is that just make 1st thread as the "primary" thread. So
> just make this as the default implementation in the framework and
> let architectures like x86 that have special primary thread to
> override this function (which they've already done).
> 
> There's no need to provide a stub function if !CONFIG_SMP or
> !CONFIG_HOTPLUG_SMT. In such case the testing CPU is already
> the 1st CPU in the SMT so it's always the primary thread.
>

LGTM:

Reviewed-by: Sudeep Holla 

> +  * enabled/active. Architectures do have a special primary thread

If you respin

^^ s/do/that/ or s/do/that do/

-- 
Regards,
Sudeep

Re: [PATCH v12 4/4] arm64: Kconfig: Enable HOTPLUG_SMT

2025-03-11 Thread Sudeep Holla

On Tue, Mar 11, 2025 at 03:51:43PM +0800, Yicong Yang wrote:
> From: Yicong Yang 
> 
> Enable HOTPLUG_SMT for SMT control.
> 

Reviewed-by: Sudeep Holla 

-- 
Regards,
Sudeep

Re: [PATCH v12 2/4] arch_topology: Support SMT control for OF based system

2025-03-11 Thread Sudeep Holla

On Tue, Mar 11, 2025 at 03:51:41PM +0800, Yicong Yang wrote:
> From: Yicong Yang 
> 
> On building the topology from the devicetree, we've already gotten the
> SMT thread number of each core. Update the largest SMT thread number
> and enable the SMT control by the end of topology parsing.
> 
> The framework's SMT control provides two interface to the users [1]
> through /sys/devices/system/cpu/smt/control:
> 1) enable SMT by writing "on" and disable by "off"
> 2) enable SMT by writing max_thread_number or disable by writing 1
> 
> Both method support to completely disable/enable the SMT cores so both
> work correctly for symmetric SMT platform and asymmetric platform with
> non-SMT and one type SMT cores like:
> core A: 1 thread
> core B: X (X!=1) threads
> 
> Note that for a theoretically possible multiple SMT-X (X>1) core
> platform the SMT control is also supported as expected but only
> by writing the "on/off" method.
> 
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542

Just the path must suffice here, no need for URL.

LGTM otherwise, much simple now:

Reviewed-by: Sudeep Holla 

-- 
Regards,
Sudeep

Re: [PATCH v12 3/4] arm64: topology: Support SMT control on ACPI based system

2025-03-11 Thread Sudeep Holla

On Tue, Mar 11, 2025 at 03:51:42PM +0800, Yicong Yang wrote:
> From: Yicong Yang 
> 
> For ACPI we'll build the topology from PPTT and we cannot directly
> get the SMT number of each core. Instead using a temporary xarray
> to record the heterogeneous information (from ACPI_PPTT_ACPI_IDENTICAL)
> and SMT information of the first core in its heterogeneous CPU cluster
> when building the topology. Then we can know the largest SMT number
> in the system. If a homogeneous system's using ACPI 6.2 or later,
> all the CPUs should be under the root node of PPTT. There'll be
> only one entry in the xarray and all the CPUs in the system will
> be assumed identical.
> 
> The framework's SMT control provides two interface to the users [1]
> through /sys/devices/system/cpu/smt/control:
> 1) enable SMT by writing "on" and disable by "off"
> 2) enable SMT by writing max_thread_number or disable by writing 1
> 
> Both method support to completely disable/enable the SMT cores so both
> work correctly for symmetric SMT platform and asymmetric platform with
> non-SMT and one type SMT cores like:
> core A: 1 thread
> core B: X (X!=1) threads
> 
> Note that for a theoretically possible multiple SMT-X (X>1) core
> platform the SMT control is also supported as expected but only
> by writing the "on/off" method.
> 
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/sysfs-devices-system-cpu#n542

Ditto, just path please.

Reviewed-by: Sudeep Holla 

--
Regards,
Sudeep

[PATCH 5.10 093/462] module: Extend the preempt disabled section in dereference_symbol_descriptor().

2025-03-11 Thread Greg Kroah-Hartman

5.10-stable review patch.  If anyone has any objections, please let me know.

--

From: Sebastian Andrzej Siewior 

[ Upstream commit a145c848d69f9c6f32008d8319edaa133360dd74 ]

dereference_symbol_descriptor() needs to obtain the module pointer
belonging to pointer in order to resolve that pointer.
The returned mod pointer is obtained under RCU-sched/ preempt_disable()
guarantees and needs to be used within this section to ensure that the
module is not removed in the meantime.

Extend the preempt_disable() section to also cover
dereference_module_function_descriptor().

Fixes: 04b8eb7a4ccd9 ("symbol lookup: introduce 
dereference_symbol_descriptor()")
Cc: James E.J. Bottomley 
Cc: Christophe Leroy 
Cc: Helge Deller 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Naveen N Rao 
Cc: Nicholas Piggin 
Cc: Sergey Senozhatsky 
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Sergey Senozhatsky 
Acked-by: Peter Zijlstra (Intel) 
Signed-off-by: Sebastian Andrzej Siewior 
Link: https://lore.kernel.org/r/20250108090457.512198-2-bige...@linutronix.de
Signed-off-by: Petr Pavlu 
Signed-off-by: Sasha Levin 
---
 include/linux/kallsyms.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 430f1cefbb9e1..ea2eb5fe83a3c 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -63,10 +63,10 @@ static inline void *dereference_symbol_descriptor(void *ptr)
 
preempt_disable();
mod = __module_address((unsigned long)ptr);
-   preempt_enable();
 
if (mod)
ptr = dereference_module_function_descriptor(mod, ptr);
+   preempt_enable();
 #endif
return ptr;
 }
-- 
2.39.5

[RFC 1/2] mm/ptdump: Split note_page() into level specific callbacks

2025-03-11 Thread Anshuman Khandual

Last argument passed down in note_page() is u64 assuming pxd_val() returned
value (all page table levels) is 64 bit - which might not be the case going
ahead when D128 page tables is enabled on arm64 platform. Besides pxd_val()
is very platform specific and its type should not be assumed in generic MM.

Split note_page() into individual page table level specific callbacks which
accepts corresponding pxd_t argument instead and then subscribing platforms
just derive pxd_val() from the entries as required and proceed as earlier.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Cc: Paul Walmsley 
Cc: Palmer Dabbelt 
Cc: Gerald Schaefer 
Cc: Heiko Carstens 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Andrew Morton 
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@kvack.org
Signed-off-by: Anshuman Khandual 
---
 arch/arm64/include/asm/ptdump.h | 10 
 arch/arm64/mm/ptdump.c  | 37 ++--
 arch/powerpc/mm/ptdump/ptdump.c | 37 ++--
 arch/riscv/mm/ptdump.c  | 37 ++--
 arch/s390/mm/dump_pagetables.c  | 37 ++--
 arch/x86/mm/dump_pagetables.c   | 31 +++-
 include/linux/ptdump.h  |  7 --
 mm/ptdump.c | 43 +
 8 files changed, 218 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 6cf4aae05219..8baba0d1aa8f 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -60,6 +60,11 @@ struct ptdump_pg_state {
 void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
 void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
   u64 val);
+void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte);
+void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud);
+void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
 #ifdef CONFIG_PTDUMP_DEBUGFS
 #define EFI_RUNTIME_MAP_ENDDEFAULT_MAP_WINDOW_64
 void __init ptdump_debugfs_register(struct ptdump_info *info, const char 
*name);
@@ -70,6 +75,11 @@ static inline void ptdump_debugfs_register(struct 
ptdump_info *info,
 #else
 static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
 int level, u64 val) { }
+static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, 
pte_t pte) { }
+static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, 
pmd_t pmd) { }
+static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, 
pud_t pud) { }
+static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, 
p4d_t p4d) { }
+static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, 
pgd_t pgd) { }
 #endif /* CONFIG_PTDUMP_CORE */
 
 #endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 688fbe0271ca..0c66c8474a48 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -251,6 +251,31 @@ void note_page(struct ptdump_state *pt_st, unsigned long 
addr, int level,
 
 }
 
+void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+   note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+   note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+   note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+   note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+   note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
 void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
 {
unsigned long end = ~0UL;
@@ -266,7 +291,11 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info 
*info)
.pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
-   .note_page = note_page,
+   .note_page_pte = note_page_pte,
+   .note_page_pmd = note_page_pmd,
+   .note_page_pud = note_page_pud,
+   .note_page_p4d = note_page_p4d,
+   .note_page_pgd = note_page_pgd,
.range = (struct ptdump_range[]){
{info->base_addr, end},
{0, 0}
@@

Re: [PATCH v4 net-next 01/14] net: enetc: add initial netc-lib driver to support NTMP

2025-03-11 Thread Michal Kubiak

On Tue, Mar 11, 2025 at 01:38:17PM +0800, Wei Fang wrote:
> Some NETC functionality is controlled using control messages sent to the
> hardware using BD ring interface with 32B descriptor similar to transmit
> BD ring used on ENETC. This BD ring interface is referred to as command
> BD ring. It is used to configure functionality where the underlying
> resources may be shared between different entities or being too large to
> configure using direct registers. Therefore, a messaging protocol called
> NETC Table Management Protocol (NTMP) is provided for exchanging
> configuration and management information between the software and the
> hardware using the command BD ring interface.
> 
> For i.MX95, NTMP has been upgraded to version 2.0, which is incompatible
> with LS1028A, because the message formats have been changed. Therefore,
> add the netc-lib driver to support NTMP 2.0 to operate various tables.
> Note that, only MAC address filter table and RSS table are supported at
> the moment. More tables will be supported in subsequent patches.
> 
> It is worth mentioning that the purpose of the netc-lib driver is to
> provide some NTMP-based generic interfaces for ENETC and NETC Switch
> drivers. Currently, it only supports the configurations of some tables.
> Interfaces such as tc flower and debugfs will be added in the future.
> 
> Signed-off-by: Wei Fang 

The patch looks OK to me.

Thanks,
Reviewed-by: Michal Kubiak

Re: [PATCH v3 net-next 04/13] net: enetc: add MAC filter for i.MX95 ENETC PF

2025-03-11 Thread Paolo Abeni

On 3/4/25 8:21 AM, Wei Fang wrote:
> +static void enetc_mac_list_del_matched_entries(struct enetc_pf *pf, u16 
> si_bit,
> +struct enetc_mac_addr *mac,
> +int mac_cnt)
> +{
> + struct enetc_mac_list_entry *entry;
> + int i;
> +
> + for (i = 0; i < mac_cnt; i++) {
> + entry = enetc_mac_list_lookup_entry(pf, mac[i].addr);
> + if (entry) {
> + entry->si_bitmap &= ~si_bit;
> + if (!entry->si_bitmap) {


Minor nit: here and elsewhere you could reduce the level of indentation
restructoring the code as:

if (!entry)
continue;

entry->si_bitmap &= ~si_bit;
if (entry->si_bitmap)
continue;
/P

Low-lewel details of secondary CPU bringup on mac99?

2025-03-11 Thread Andrew Randrianasulu

Hello, I see this list mostly for Linux kernel patches, but we are trying
to get two-processor virtual mac99 machine going in qemu, and sadly it does
not work without singlestepping on boot cpu in

smp_core99_kick_cpu()

https://elixir.bootlin.com/linux/v6.12.17/source/arch/powerpc/platforms/powermac/smp.c#L796

just single stepping after switching to second thread/vCPU  ends with my
naive gdb script erroring out ;)

Last email in chain on qemu-ppc list:

https://lists.gnu.org/archive/html/qemu-ppc/2025-03/msg00136.html

latest gdb log attached to

https://lists.gnu.org/archive/html/qemu-ppc/2025-03/msg00132.html

MacOS X 10.4.11 strangely works out of the box with two CPUs . Users
reported even macos 9 (with selected applications like Quicktime) works!

I can't test NetBSD/macppc due to some trouble in my crosscompiler? From
netbsd/amd64 machine using standart NetBSD  build.sh script. So I put
NetBSD aside for now.

I use 'remote' gdb running in Debian ppc sid/trixie VM on the same
NetBSD/amd64 host.

Linux kernel 6.12.17 was compiled under emulation with localmodconfig where
I changed ext4 and SCSI  disk.drivers to be builtin (for faster bootup
without initrd). I disabled CPU_HOTPLUG after disabling sleep/hibernation
support in hope it will help booting second cpu. But I was wrong about
this. Recompiling might take 30 hours or so I prefer to reuse kernel I have
;) I see mac ppc SMP code was not changed much since at least 2.6.24 times
of Gentoo 2008.0 so I hope it works on real machines!

I might spotted typo in comment

https://elixir.bootlin.com/linux/v6.12.17/source/arch/powerpc/kernel/head_book3s_32.S#L239

"/* core99 pmac starts the seconary here by changing the vector, and

   putting it back to what it was (unknown_async_exception) when done.  */

"


secondary not seconary ?

but apart from this I am mostly clueless how PowerPC works, especially in
those dual G4 machines 

If you need link to deb of this kernel, .config, qemu/openbios patches, gdb
script - just ask!

Not sure if I should crosspost this back to qemu-ppc  list?

Re: [PATCH] selftest/powerpc/mm/pkey: fix build-break introduced by commit 00894c3fc917

2025-03-11 Thread Catalin Marinas

On Tue, 11 Mar 2025 14:11:29 +0530, Madhavan Srinivasan wrote:
> Build break was reported in the powerpc mailing list for next-20250218 with 
> below errors
> 
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/mm; 
> mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C mm all
>   CC   pkey_exec_prot
> In file included from pkey_exec_prot.c:18:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In 
> function ‘pkeys_unsupported’:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function)
>96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
>   |  ^
> 
> [...]

Applied to arm64 (for-next/pkey_unrestricted), thanks!

[1/1] selftest/powerpc/mm/pkey: fix build-break introduced by commit 
00894c3fc917
  https://git.kernel.org/arm64/c/73276cee1a25

-- 
Catalin

Re: [PATCH 1/2] powerpc, bpf: Support internal-only MOV instruction to resolve per-CPU addrs

2025-03-11 Thread Christophe Leroy





Le 11/03/2025 à 17:09, Saket Kumar Bhaskar a écrit :

[Vous ne recevez pas souvent de courriers de sk...@linux.ibm.com. Découvrez 
pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]

With the introduction of commit 7bdbf7446305 ("bpf: add special
internal-only MOV instruction to resolve per-CPU addrs"),
a new BPF instruction BPF_MOV64_PERCPU_REG has been added to
resolve absolute addresses of per-CPU data from their per-CPU
offsets. This update requires enabling support for this
instruction in the powerpc JIT compiler.

As of commit 7a0268fa1a36 ("[PATCH] powerpc/64: per cpu data
optimisations"), the per-CPU data offset for the CPU is stored in
the paca.

To support this BPF instruction in the powerpc JIT, the following
powerpc instructions are emitted:

mr dst_reg, src_reg //Move src_reg to dst_reg, if src_reg != dst_reg
ld tmp1_reg, 48(13) //Load per-CPU data offset from paca(r13) in 
tmp1_reg.
add dst_reg, dst_reg, tmp1_reg  //Add the per cpu offset to the dst.


Why not do:

  add dst_reg, src_reg, tmp1_reg

instead of a combination of 'mr' and 'add' ?




To evaluate the performance improvements introduced by this change,
the benchmark described in [1] was employed.

Before Change:
glob-arr-inc   :   41.580 ± 0.034M/s
arr-inc:   39.592 ± 0.055M/s
hash-inc   :   25.873 ± 0.012M/s

After Change:
glob-arr-inc   :   42.024 ± 0.049M/s
arr-inc:   55.447 ± 0.031M/s
hash-inc   :   26.565 ± 0.014M/s

[1] 
https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fanakryiko%2Flinux%2Fcommit%2F8dec900975ef&data=05%7C02%7Cchristophe.leroy%40csgroup.eu%7Ca4bc35a9cb49457fb5cc08dd60b73783%7C8b87af7d86474dc78df45f69a2011bb5%7C0%7C0%7C638773062200197453%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=1t2Bc3w6Ye0u33UNEjsSAv114HDOGNXmk1I%2Fxt7K2sc%3D&reserved=0

Signed-off-by: Saket Kumar Bhaskar 
---
  arch/powerpc/net/bpf_jit_comp.c   | 5 +
  arch/powerpc/net/bpf_jit_comp64.c | 8 
  2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..3d4bd45a9a22 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -440,6 +440,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
 return IS_ENABLED(CONFIG_PPC64);
  }

+bool bpf_jit_supports_percpu_insn(void)
+{
+   return true;
+}
+


What about PPC32 ?


  void *arch_alloc_bpf_trampoline(unsigned int size)
  {
 return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 233703b06d7c..06f06770ceea 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -679,6 +679,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
  */
 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+   if (insn_is_mov_percpu_addr(&insn[i])) {
+   if (dst_reg != src_reg)
+   EMIT(PPC_RAW_MR(dst_reg, src_reg));


Shouldn't be needed except for the non-SMP case maybe.


+#ifdef CONFIG_SMP
+   EMIT(PPC_RAW_LD(tmp1_reg, _R13, offsetof(struct 
paca_struct, data_offset)));
+   EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));


Can use src_reg as first operand instead of dst_reg


+#endif


data_offset always exists in paca_struct, please use 
IS_ENABLED(CONFIG_SMP) instead of #ifdef



+   }
 if (imm == 1) {
 /* special mov32 for zext */
 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 
31));
--
2.43.5

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Mark Brown

On Thu, Mar 06, 2025 at 08:51:20PM +0200, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" 
> 
> high_memory defines upper bound on the directly mapped memory.
> This bound is defined by the beginning of ZONE_HIGHMEM when a system has
> high memory and by the end of memory otherwise.
> 
> All this is known to generic memory management initialization code that
> can set high_memory while initializing core mm structures.
> 
> Remove per-architecture calculation of high_memory and add a generic
> version to free_area_init().

This patch appears to be causing breakage on a number of 32 bit arm
platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
die on boot with no output, a bisect with qemu points at this commit and
those for physical platforms appear to be converging on the same place.

Bisect log:

# bad: [eea255893718268e1ab852fb52f70c613d109b99] Add linux-next specific files 
for 20250311
# good: [97654dc13f139ea726042711a4943f424c5d5b83] Merge branch 
'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
# good: [5cd09a324588b4554c9ed89cef34fa502a097d16] fs/proc/task_mmu: reduce 
scope of lazy mmu region
# good: [8a7e7a03e3c53cd9abbbf233899cc2e05b2c6ec0] ASoC: SOF: Intel: Add 
support for ACE3+ mic privacy
# good: [1ec3f1dc215d4b3d3679ecdc4a549d4e82b3a609] ASoC: dmic: add regulator 
support
# good: [69823334200029767de785d30acf74e4872a11d3] ASoC: SOF: Intel: mtl: Split 
up dsp_ops setup code
# good: [db91ad81a2545eb82aa47d0306bc3e1adb05e336] ASoC: dt-bindings: 
fsl,imx-asrc: Document audio graph port
# good: [a8fed0bddf8fa239fc71dc5c035d2e078c597369] ASoC: dt-bindings: add 
regulator support to dmic codec
# good: [0d2d276f53ea3ba1686619cde503d9748f58a834] ASoC: SOF: Intel: lnl/ptl: 
Only set dsp_ops which differs from MTL
# good: [8aeb7d2c3fc315e629d252cd601598a5af74bbb0] ASoC: SOF: Intel: Create 
ptl.c as placeholder for Panther Lake features
# good: [4a43c3241ec3465a501825ecaf051e5a1d85a60b] ASoC: SOF: Intel: ptl: Add 
support for mic privacy
# good: [80416226920c21e806f93bd0930d67557f41600f] ASoC: SOF: Intel: mtl: Stop 
exporting dsp_ops callback functions
# good: [d3321a20b5111a66f3e68798959a347acfccbd44] ASoC: dmic: add regulator 
support
# good: [eea84a7f0cdb693c261a7cf84bd4b3d81479c9a6] ASoC: SOF: ipc4: Add support 
for Intel HW managed mic privacy messaging
# good: [0978e8207b61ac6d51280e5d28ccfff75d653363] ASoC: SOF: Intel: hda-mlink: 
Add support for mic privacy in VS SHIM registers
# good: [a0db661e7d8e084e9cf3b9cdca7c6e4e66f2e849] ASoC: SOF: hda/shim: Add 
callbacks to handle mic privacy change for sdw
# good: [02a838b01b8e7c00e2efe78db06fff356a112dec] spi: dt-bindings: fsl-lpspi: 
Add i.MX94 support
# good: [5d5eceb9bb1050774dadc6919a258729f276fd00] ASoC: soc-dai: add 
snd_soc_dai_mute_is_ctrled_at_trigger()
# good: [3707fd9c383fc7ae19733a3ad2e5a82bf86370a0] spi: stm32: Remove 
unnecessary print function dev_err()
# good: [7a2ff0510c51462c0a979f5006d375a2b23d46e9] ASoC: soc-pcm: reuse 
dpcm_state_string()
# good: [269b844239149a9bbaba66518db99ebb06554a15] ASoC: dapm: Fix changes to 
DECLARE_ADAU17X1_DSP_MUX_CTRL
# good: [7dfc9bdde9fa20cf1ac5cbea97b0446622ca74c7] spi: stm32-ospi: Fix an 
IS_ERR() vs NULL bug in stm32_ospi_get_resources()
# good: [2c2eadd07e747059ccd65e68cd1d1b23ca96b072] ASoC: cs42l43: convert to 
SYSTEM_SLEEP_PM_OPS
# good: [c6141ba0110f98266106699aca071fed025c3d64] ASoC: Merge up fixes
# good: [a1462fb8b5dd1018e3477a6861822d75c6a59449] ASoC: Intel: boards: updates 
for 6.15
# good: [1ff07522690d2c2b67343099d2d046e88f71cddb] ASoC: Intel: 
soc-acpi-intel-lnl-match: add cs42l43 6x cs35l56 support
# good: [ffe450cb6bce16eb15f6bf90b85b7e5f9bfbc1e3] ASoC: Intel: 
soc-acpi-intel-ptl-match: add rt713_vb_l3_rt1320_l12 support
# good: [65e246d33dede0008f281d3d09b7695bef2d18eb] ASoC: sdw_utils: add mic and 
amp dais to 0x codec
# good: [c7a6a74f847923bb726029b85a3fd0e05e9fbb04] ASoC: Intel: 
soc-acpi-intel-ptl-match: add sdw multi function mockup codec
# good: [02467341e3577836648753a9e9a5c196f08187da] ASoC: Intel: 
soc-acpi-intel-ptl-match: add rt712_vb_l3_rt1320_l2 support
# good: [438405704eec45c06be9adc94eb5f94855412790] ASoC: Intel: 
soc-acpi-intel-lnl-match: add sdw multi function mockup codec
# good: [8b36447c9ae102539d82d6278971b23b20d87629] ASoC: Intel: adl: add 
2xrt1316 audio configuration
# good: [e1a0657c6d943528ef58671594ca7e5b17db5394] ASoC: Intel: add 
multi-function SDW mockup codec match
# good: [7172d9ae29afd00c8ee9a8e3a4eba4cea5d5e403] ASoC: Intel: 
soc-acpi-intel-ptl-match: add cs42l43 6x cs35l56 support
# good: [b92bc4d6e21f1802a39975e3c7cc4f76f591d46f] ASoC: soc-pcm: merge 
soc_pcm_hw_update_format/subformat()
# good: [de22dc76e11d1291d4f50b73dbbaa158ba9d6acd] ASoC: doc: use 
SND_SOC_DAILINK_xxx() macro
# good: [6db63090272768785e6bb4a3afa16650c1e96c54] ASoC: Tidy up SOC_DOUBLE_* 
and SOC_SINGLE_* helpers
# good: [426aae69373fb149e5bbe1d5fa18299

[PATCH 03/13] hexagon: move initialization of init_mm.context init to paging_init()

2025-03-11 Thread Mike Rapoport

From: "Mike Rapoport (Microsoft)" 

This will help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/hexagon/mm/init.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 3458f39ca2ac..508bb6a8dcc9 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -59,14 +59,6 @@ void __init mem_init(void)
 *  To-Do:  someone somewhere should wipe out the bootmem map
 *  after we're done?
 */
-
-   /*
-* This can be moved to some more virtual-memory-specific
-* initialization hook at some point.  Set the init_mm
-* descriptors "context" value to point to the initial
-* kernel segment table's physical address.
-*/
-   init_mm.context.ptbase = __pa(init_mm.pgd);
 }
 
 void sync_icache_dcache(pte_t pte)
@@ -103,6 +95,12 @@ static void __init paging_init(void)
 
free_area_init(max_zone_pfn);  /*  sets up the zonelists and mem_map  */
 
+   /*
+* Set the init_mm descriptors "context" value to point to the
+* initial kernel segment table's physical address.
+*/
+   init_mm.context.ptbase = __pa(init_mm.pgd);
+
/*
 * Start of high memory area.  Will probably need something more
 * fancy if we...  get more fancy.
-- 
2.47.2

[PATCH 5.15 153/620] module: Extend the preempt disabled section in dereference_symbol_descriptor().

2025-03-11 Thread Greg Kroah-Hartman

5.15-stable review patch.  If anyone has any objections, please let me know.

--

From: Sebastian Andrzej Siewior 

[ Upstream commit a145c848d69f9c6f32008d8319edaa133360dd74 ]

dereference_symbol_descriptor() needs to obtain the module pointer
belonging to pointer in order to resolve that pointer.
The returned mod pointer is obtained under RCU-sched/ preempt_disable()
guarantees and needs to be used within this section to ensure that the
module is not removed in the meantime.

Extend the preempt_disable() section to also cover
dereference_module_function_descriptor().

Fixes: 04b8eb7a4ccd9 ("symbol lookup: introduce 
dereference_symbol_descriptor()")
Cc: James E.J. Bottomley 
Cc: Christophe Leroy 
Cc: Helge Deller 
Cc: Madhavan Srinivasan 
Cc: Michael Ellerman 
Cc: Naveen N Rao 
Cc: Nicholas Piggin 
Cc: Sergey Senozhatsky 
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Reviewed-by: Sergey Senozhatsky 
Acked-by: Peter Zijlstra (Intel) 
Signed-off-by: Sebastian Andrzej Siewior 
Link: https://lore.kernel.org/r/20250108090457.512198-2-bige...@linutronix.de
Signed-off-by: Petr Pavlu 
Signed-off-by: Sasha Levin 
---
 include/linux/kallsyms.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index eae9f423bd648..0f73f69e64035 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -66,10 +66,10 @@ static inline void *dereference_symbol_descriptor(void *ptr)
 
preempt_disable();
mod = __module_address((unsigned long)ptr);
-   preempt_enable();
 
if (mod)
ptr = dereference_module_function_descriptor(mod, ptr);
+   preempt_enable();
 #endif
return ptr;
 }
-- 
2.39.5

[PATCH v8 6/7] powerpc/pseries: Add papr-platform-dump character driver for dump retrieval

2025-03-11 Thread Haren Myneni

ibm,platform-dump RTAS call in combination with writable mapping
/dev/mem is issued to collect platform dump from the hypervisor
and may need multiple calls to get the complete dump. The current
implementation uses rtas_platform_dump() API provided by librtas
library to issue these RTAS calls. But /dev/mem access by the
user space is prohibited under system lockdown.

The solution should be to restrict access to RTAS function in user
space and provide kernel interfaces to collect dump. This patch
adds papr-platform-dump character driver and expose standard
interfaces such as open / ioctl/ read to user space in ways that
are compatible with lockdown.

PAPR (7.3.3.4.1 ibm,platform-dump) provides a method to obtain
the complete dump:
- Each dump will be identified by ID called dump tag.
- A sequence of RTAS calls have to be issued until retrieve the
  complete dump. The hypervisor expects the first RTAS call with
  the sequence 0 and the subsequent calls with the sequence
  number returned from the previous calls.
- The hypervisor returns "dump complete" status once the complete
  dump is retrieved. But expects one more RTAS call from the
  partition with the NULL buffer to invalidate dump which means
  the dump will be removed in the hypervisor.
- Sequence of calls are allowed with different dump IDs at the
  same time but not with the same dump ID.

Expose these interfaces to user space with a /dev/papr-platform-dump
character device using the following programming model:

   int devfd = open("/dev/papr-platform-dump", O_RDONLY);
   int fd = ioctl(devfd,PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE, &dump_id)
- Restrict user space to access with the same dump ID.
  Typically we do not expect user space requests the dump
  again for the same dump ID.
   char *buf = malloc(size);
   length = read(fd, buf, size);
- size should be minimum 1K based on PAPR and  <= 4K based
  on RTAS work area size. It will be restrict to RTAS work
  area size. Using 4K work area based on the current
  implementation in librtas library
- Each read call issue RTAS call to get the data based on
  the size requirement and returns bytes returned from the
  hypervisor
- If the previous call returns dump complete status, the
  next read returns 0 like EOF.
   ret = ioctl(PAPR_PLATFORM_DUMP_IOC_INVALIDATE, &dump_id)
- RTAS call with NULL buffer to invalidates the dump.

The read API should use the file descriptor obtained from ioctl
based on dump ID so that gets dump contents for the corresponding
dump ID. Implemented support in librtas (rtas_platform_dump()) for
this new ABI to support system lockdown.

Signed-off-by: Haren Myneni 
---
 .../userspace-api/ioctl/ioctl-number.rst  |   2 +
 .../include/uapi/asm/papr-platform-dump.h |  15 +
 arch/powerpc/platforms/pseries/Makefile   |   1 +
 .../platforms/pseries/papr-platform-dump.c| 411 ++
 4 files changed, 429 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/papr-platform-dump.h
 create mode 100644 arch/powerpc/platforms/pseries/papr-platform-dump.c

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index f9332b634116..1b661436aa7c 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -365,6 +365,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  03-05 arch/powerpc/include/uapi/asm/papr-indices.h 
powerpc/pseries indices API
  

+0xB2  06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h   
powerpc/pseries Platform Dump API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-platform-dump.h 
b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
new file mode 100644
index ..a1d89c290dab
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PLATFORM_DUMP_H_
+#define _UAPI_PAPR_PLATFORM_DUMP_H_
+
+#include 
+#include 
+
+/*
+ * ioctl for /dev/papr-platform-dump. Returns a platform-dump handle fd
+ * corresponding to dump tag.
+ */
+#define PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 6, 
__u64)
+#define PAPR_PLATFORM_DUMP_IOC_INVALIDATE_IOW(PAPR_MISCDEV_IOC_ID, 7, 
__u64)
+
+#endif /* _UAPI_PAPR_

Re: [linux-next-20250307] Build Failure

2025-03-11 Thread Christophe Leroy





Le 09/03/2025 à 13:38, Venkat Rao Bagalkote a écrit :

Greetings!!,

I see linux-next-20250307 fails to build on IBM Power9 and Power10 servers.


Errors:

In file included from ^[[01m^[[K^[[m^[[K:
^[[01m^[[K./usr/include/cxl/features.h:11:10:^[[m^[[K ^[[01;31m^[[Kfatal 
error: ^[[m^[[Kuuid/uuid.h: No such file or directory

    11 | #include ^[[01;31m^[[K^[[m^[[K
   |  ^[[01;31m^[[K^^[[m^[[K


This is unreadable. Please avoid fancy colors that add escapes to logs. 
You can unset LANG environment var before building in order to get 
pastable stuff.


By the way I don't think it is a kernel issue because nowhere in the 
kernel you have uuid/uuid.h


But can you provide your .config (the actual one, not an old one from 
kernel 6.0.0-rc3 like last time) and tell which version of GCC you are 
using.


Thanks
Christophe

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Russell King (Oracle)

On Tue, Mar 11, 2025 at 09:33:29PM +, Mark Brown wrote:
> On Tue, Mar 11, 2025 at 11:06:56PM +0200, Mike Rapoport wrote:
> > On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:
> 
> > > This patch appears to be causing breakage on a number of 32 bit arm
> > > platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> > > die on boot with no output, a bisect with qemu points at this commit and
> > > those for physical platforms appear to be converging on the same place.
> 
> > Can you share how this can be reproduced with qemu?
> 
> https://lava.sirena.org.uk/scheduler/job/1184953
> 
> Turns out it's actually producing output on qemu:
> 
> [0.00] Booting Linux on physical CPU 0x0
> [0.00] Linux version 6.14.0-rc6-next-20250311 (tuxmake@tuxmake) 
> (arm-linux-gnueabihf-gcc (Debian 13.3.0-5) 13.3.0, GNU ld (GNU Binutils for 
> Debian) 2.43.1) #1 SMP @1741691801
> [0.00] CPU: ARMv7 Processor [414fc0f0] revision 0 (ARMv7), cr=10c5387d
> [0.00] CPU: div instructions available: patching division code
> [0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
> [0.00] OF: fdt: Machine model: linux,dummy-virt
> [0.00] random: crng init done
> [0.00] earlycon: pl11 at MMIO 0x0900 (options '')
> [0.00] printk: legacy bootconsole [pl11] enabled
> [0.00] Memory policy: Data cache writealloc
> [0.00] efi: UEFI not found.
> [0.00] cma: Reserved 64 MiB at 0x
> 
> - I'd only been sampling the logs for the physical platforms, none of
> which had shown anything.
> 
> (you dropped me from the CCs BTW!)

That's because your emails contain a "Mail-Followup-To:" header.

Please read:

https://datatracker.ietf.org/doc/html/draft-ietf-drums-mail-followup-to-00.txt

particularly 2.4.

In effect, by including this header, you asked to be dropped.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

[PATCH] Documentation: ocxl.rst: Update consortium site

2025-03-11 Thread Fritz Koenig

Old site no longer associated with consortium.

Signed-off-by: Fritz Koenig 
---
After mergers the OpenCAPI Consortium does not seem to exist.
The github page is the only seemingly relevant site, but it
has not been updated in 4 years.
---
 Documentation/userspace-api/accelerators/ocxl.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/userspace-api/accelerators/ocxl.rst 
b/Documentation/userspace-api/accelerators/ocxl.rst
index db7570d5e50d..5fc86ead39f4 100644
--- a/Documentation/userspace-api/accelerators/ocxl.rst
+++ b/Documentation/userspace-api/accelerators/ocxl.rst
@@ -3,8 +3,8 @@ OpenCAPI (Open Coherent Accelerator Processor Interface)
 
 
 OpenCAPI is an interface between processors and accelerators. It aims
-at being low-latency and high-bandwidth. The specification is
-developed by the `OpenCAPI Consortium <http://opencapi.org/>`_.
+at being low-latency and high-bandwidth. The specification is developed
+by the `OpenCAPI Consortium <https://opencapi.github.io/oc-accel-doc/>`_.
 
 It allows an accelerator (which could be an FPGA, ASICs, ...) to access
 the host memory coherently, using virtual addresses. An OpenCAPI

---
base-commit: 0b46b049d6eccd947c361018439fcb596e741d7a
change-id: 20250311-dead_site-e96834910663

Best regards,
-- 
Fritz Koenig

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Russell King (Oracle)

On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:
> On Thu, Mar 06, 2025 at 08:51:20PM +0200, Mike Rapoport wrote:
> > From: "Mike Rapoport (Microsoft)" 
> > 
> > high_memory defines upper bound on the directly mapped memory.
> > This bound is defined by the beginning of ZONE_HIGHMEM when a system has
> > high memory and by the end of memory otherwise.
> > 
> > All this is known to generic memory management initialization code that
> > can set high_memory while initializing core mm structures.
> > 
> > Remove per-architecture calculation of high_memory and add a generic
> > version to free_area_init().
> 
> This patch appears to be causing breakage on a number of 32 bit arm
> platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> die on boot with no output, a bisect with qemu points at this commit and
> those for physical platforms appear to be converging on the same place.

I'm not convinced that the old and the new code is doing the same
thing.

The new code:

+   phys_addr_t highmem = memblock_end_of_DRAM();
+
+#ifdef CONFIG_HIGHMEM
+   unsigned long pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
+
+   if (arch_has_descending_max_zone_pfns() || highmem > PFN_PHYS(pfn))
+   highmem = PFN_PHYS(pfn);
+#endif
+
+   high_memory = phys_to_virt(highmem - 1) + 1;

First, when CONFIG_HIGHMEM is disabled, this code assumes that the last
byte of DRAM declared to memblock is the highmem limit. This _could_
overflow phys_to_virt() and lead to an invalid value for high_memory.

Second, arch_zone_lowest_possible_pfn[ZONE_HIGHMEM] is the _start_ of
highmem. This is not what arch code sets high_memory to - because
the start of highmem may not contiguously follow on from lowmem.

In arch/arm/mm/mmu.c, lowmem_limit is computed to be the highest + 1
physical address that lowmem can possibly be, taking into account the
amount of vmalloc memory that is required. This is used to set
high_memory.

We also limit the amount of usable RAM via memblock_set_current_limit()
which memblock_end_of_DRAM() doesn't respect.

I don't think the proposed generic version is suitable for 32-bit arm.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Geert Uytterhoeven

Hi Mark,

On Tue, 11 Mar 2025 at 22:33, Mark Brown  wrote:
> On Tue, Mar 11, 2025 at 11:06:56PM +0200, Mike Rapoport wrote:
> > On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:
> > > This patch appears to be causing breakage on a number of 32 bit arm
> > > platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> > > die on boot with no output, a bisect with qemu points at this commit and
> > > those for physical platforms appear to be converging on the same place.
>
> > Can you share how this can be reproduced with qemu?
>
> https://lava.sirena.org.uk/scheduler/job/1184953
>
> Turns out it's actually producing output on qemu:
>
> [0.00] Booting Linux on physical CPU 0x0
> [0.00] Linux version 6.14.0-rc6-next-20250311 (tuxmake@tuxmake) 
> (arm-linux-gnueabihf-gcc (Debian 13.3.0-5) 13.3.0, GNU ld (GNU Binutils for 
> Debian) 2.43.1) #1 SMP @1741691801
> [0.00] CPU: ARMv7 Processor [414fc0f0] revision 0 (ARMv7), cr=10c5387d
> [0.00] CPU: div instructions available: patching division code
> [0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
> [0.00] OF: fdt: Machine model: linux,dummy-virt
> [0.00] random: crng init done
> [0.00] earlycon: pl11 at MMIO 0x0900 (options '')
> [0.00] printk: legacy bootconsole [pl11] enabled
> [0.00] Memory policy: Data cache writealloc
> [0.00] efi: UEFI not found.
> [0.00] cma: Reserved 64 MiB at 0x
>
> - I'd only been sampling the logs for the physical platforms, none of
> which had shown anything.

Hangs that early need "earlycon", which the qemu boot above does have.

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds

Re: [PATCH v8 1/7] powerpc/pseries: Define common functions for RTAS sequence calls

2025-03-11 Thread Mukesh Kumar Chaurasiya

On Tue, Mar 11, 2025 at 03:50:42PM -0700, Haren Myneni wrote:
> The RTAS call can be normal where retrieves the data form the
> hypervisor once or sequence based RTAS call which has to
> issue multiple times until the complete data is obtained. For
> some of these sequence RTAS calls, the OS should not interleave
> calls with different input until the sequence is completed.
> The data is collected for each call and copy to the buffer
> for the entire sequence during ioctl() handle and then expose
> this buffer to the user space with read() handle.
> 
> One such sequence RTAS call is ibm,get-vpd and its support is
> already included in the current code. To add the similar support
> for other sequence based calls, move the common functions in to
> separate file and update papr_rtas_sequence struct with the
> following callbacks so that RTAS call specific code will be
> defined and executed to complete the sequence.
> 
> struct papr_rtas_sequence {
> int error;
> void params;
> void (*begin) (struct papr_rtas_sequence *);
> void (*end) (struct papr_rtas_sequence *);
> const char * (*work) (struct papr_rtas_sequence *, size_t *);
> };
> 
> params: Input parameters used to pass for RTAS call.
> Begin:RTAS call specific function to initialize data
>   including work area allocation.
> End:  RTAS call specific function to free up resources
>   (free work area) after the sequence is completed.
> Work: The actual RTAS call specific function which collects
>   the data from the hypervisor.
> 
> Signed-off-by: Haren Myneni 
> ---
>  arch/powerpc/platforms/pseries/Makefile   |   2 +-
>  .../platforms/pseries/papr-rtas-common.c  | 310 
>  .../platforms/pseries/papr-rtas-common.h  |  61 +++
>  arch/powerpc/platforms/pseries/papr-vpd.c | 351 +++---
>  4 files changed, 416 insertions(+), 308 deletions(-)
>  create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-common.c
>  create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-common.h
> 
> diff --git a/arch/powerpc/platforms/pseries/Makefile 
> b/arch/powerpc/platforms/pseries/Makefile
> index 7bf506f6b8c8..697c216b70dc 100644
> --- a/arch/powerpc/platforms/pseries/Makefile
> +++ b/arch/powerpc/platforms/pseries/Makefile
> @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG)   += -DDEBUG
>  
>  obj-y:= lpar.o hvCall.o nvram.o reconfig.o \
>  of_helpers.o rtas-work-area.o papr-sysparm.o \
> -papr-vpd.o \
> +papr-rtas-common.o papr-vpd.o \
>  setup.o iommu.o event_sources.o ras.o \
>  firmware.o power.o dlpar.o mobility.o rng.o \
>  pci.o pci_dlpar.o eeh_pseries.o msi.o \
> diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c 
> b/arch/powerpc/platforms/pseries/papr-rtas-common.c
> new file mode 100644
> index ..2d0220209de0
> --- /dev/null
> +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c
> @@ -0,0 +1,310 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#define pr_fmt(fmt) "papr-common: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "papr-rtas-common.h"
> +
> +/*
> + * Sequence based RTAS HCALL has to issue multiple times to retrieve
> + * complete data from the hypervisor. For some of these RTAS calls,
> + * the OS should not interleave calls with different input until the
> + * sequence is completed. So data is collected for these calls during
> + * ioctl handle and export to user space with read() handle.
> + * This file provides common functions needed for such sequence based
> + * RTAS calls Ex: ibm,get-vpd and ibm,get-indices.
> + */
> +
> +bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob)
> +{
> + return blob->data && blob->len;
> +}
> +
> +void papr_rtas_blob_free(const struct papr_rtas_blob *blob)
> +{
> + if (blob) {
> + kvfree(blob->data);
> + kfree(blob);
> + }
> +}
> +
> +/**
> + * papr_rtas_blob_extend() - Append data to a &struct papr_rtas_blob.
> + * @blob: The blob to extend.
> + * @data: The new data to append to @blob.
> + * @len:  The length of @data.
> + *
> + * Context: May sleep.
> + * Return: -ENOMEM on allocation failure, 0 otherwise.
> + */
> +static int papr_rtas_blob_extend(struct papr_rtas_blob *blob,
> + const char *data, size_t len)
> +{
> + const size_t new_len = blob->len + len;
> + const size_t old_len = blob->len;
> + const char *old_ptr = blob->data;
> + char *new_ptr;
> +
> + new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
> + if (!new_ptr)
> + return -ENOMEM;
> +
> + memcpy(&new_ptr[old_len], data, len);
> + blob->data = new_ptr;
> + blob->len = new_len;
> + return 0;
> +}
> +
> +/**
> + * papr_rtas_blob_gene

Re: PowerPC: Observing Kernel softlockup while running ftrace selftest

2025-03-11 Thread Venkat Rao Bagalkote




On 10/03/25 4:25 pm, Hari Bathini wrote:

Venkat, can you confirm if the issue is reproducible
disabling CONFIG_PPC_IRQ_SOFT_MASK_DEBUG  ?


Hari,

This issue is reproducible after diabling CONFIG_PPC_IRQ_SOFT_MASK_DEBUG.

# powerpc Debugging
#
CONFIG_PPC_DISABLE_WERROR=y
CONFIG_PRINT_STACK_DEPTH=64
# CONFIG_HCALL_STATS is not set
CONFIG_PPC_EMULATED_STATS=y
# CONFIG_CODE_PATCHING_SELFTEST is not set
CONFIG_JUMP_LABEL_FEATURE_CHECKS=y
# CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG is not set
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
# CONFIG_GUEST_STATE_BUFFER_TEST is not set
# CONFIG_PPC_IRQ_SOFT_MASK_DEBUG is not set
CONFIG_PPC_RFI_SRR_DEBUG=y
CONFIG_XMON=y
# CONFIG_XMON_DEFAULT is not set
CONFIG_XMON_DISASSEMBLY=y
CONFIG_XMON_DEFAULT_RO_MODE=y
CONFIG_DEBUGGER=y
CONFIG_BOOTX_TEXT=y
# CONFIG_PPC_EARLY_DEBUG is not set
# end of powerpc Debugging

Regards,

Venkat.




On 06/03/25 6:04 pm, Venkat Rao Bagalkote wrote:

Greetings!!!


I am observing soft lock up's while running ftrace selftest on linux- 
next kernel.


Kernel Version: next-20250305

make run_tests
TAP version 13
1..2
# timeout set to 0
# selftests: ftrace: poll
# Error: Polling file is not specified
not ok 1 selftests: ftrace: poll # exit=255
# timeout set to 0
# selftests: ftrace: ftracetest-ktap
# TAP version 13
# 1..155
# ok 1 Basic trace file check
# ok 2 Basic test for tracers
# ok 3 Basic trace clock test
# ok 4 Basic event tracing check
# ok 5 Test tracefs GID mount option
# ok 6 Change the ringbuffer size
# ok 7 Change the ringbuffer sub-buffer size
# ok 8 Snapshot and tracing_cpumask
# ok 9 Snapshot and tracing setting
# ok 10 Test file and directory ownership changes for eventfs
# ok 11 Basic tests on writing to trace_marker
# ok 12 trace_pipe and trace_marker
# not ok 13 Test ftrace direct functions against tracers # UNRESOLVED
# not ok 14 Test ftrace direct functions against kprobes # UNRESOLVED
# ok 15 Generic dynamic event - add/remove probes with BTF arguments 
# SKIP

# ok 16 Generic dynamic event - add/remove eprobe events
# ok 17 Generic dynamic event - Repeating add/remove fprobe events # 
SKIP

# ok 18 Generic dynamic event - add/remove fprobe events # SKIP
# ok 19 Generic dynamic event - add/remove kprobe events
# ok 20 Generic dynamic event - add/remove synthetic events
# ok 21 Generic dynamic event - add/remove tracepoint probe events on 
module # SKIP
# ok 22 Generic dynamic event - add/remove tracepoint probe events # 
SKIP

# ok 23 Generic dynamic event - add/remove/test uprobe events
# ok 24 Generic dynamic event - selective clear (compatibility)
# ok 25 Checking dynamic events limitations
# ok 26 Event probe event parser error log check
# ok 27 Fprobe event VFS type argument # SKIP
# ok 28 Function return probe entry argument access # SKIP
# ok 29 Fprobe event parser error log check # SKIP
# ok 30 Generic dynamic event - generic clear event
# ok 31 Generic dynamic event - check if duplicate events are caught
# ok 32 Tracepoint probe event parser error log check # SKIP
# ok 33 event tracing - enable/disable with event level files
# not ok 34 event tracing - enable/disable with module event # 
UNRESOLVED

# ok 35 event tracing - restricts events based on pid notrace filtering
# ok 36 event tracing - restricts events based on pid
# ok 37 event tracing - enable/disable with subsystem level files
# ok 38 event tracing - enable/disable with top level files
# not ok 39 Test trace_printk from module # UNRESOLVED
# ok 40 event filter function - test event filtering on functions
# ok 41 ftrace - function graph filters with stack tracer
# ok 42 ftrace - function graph filters
# ok 43 ftrace - function graph filters
# ok 44 ftrace - function profiler with function graph tracing
# ok 45 ftrace - function graph print function return value # SKIP
# ok 46 ftrace - function trace with cpumask
# ok 47 ftrace - test for function event triggers
# ok 48 ftrace - function glob filters
# ok 49 ftrace - function pid notrace filters
# ok 50 ftrace - function pid filters
# ok 51 ftrace - stacktrace filter command


Warnings:


[ 2668.008907] watchdog: BUG: soft lockup - CPU#0 stuck for 2265s! 
[swapper/0:0]
[ 2668.008954] Modules linked in: bonding(E) nft_fib_inet(E) 
nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) 
nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) 
nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) 
nf_defrag_ipv4(E) ip_set(E) rfkill(E) nf_tables(E) nfnetlink(E) 
mlx5_ib(E) ib_uverbs(E) ib_core(E) pseries_rng(E) vmx_crypto(E) 
dax_pmem(E) drm(E) drm_panel_orientation_quirks(E) xfs(E) sr_mod(E) 
cdrom(E) sd_mod(E) sg(E) lpfc(E) nd_pmem(E) nvmet_fc(E) nd_btt(E) 
ibmvscsi(E) scsi_transport_srp(E) ibmveth(E) nvmet(E) nvme_fc(E) 
mlx5_core(E) nvme_fabrics(E) papr_scm(E) mlxfw(E) nvme_core(E) 
libnvdimm(E) tls(E) psample(E) scsi_transport_fc(E) fuse(E)
[ 2668.010198] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Kdump: loaded 
Tainted: G    W   EL 6.14.0-rc5-next-20250305 #1

[ 2668.01

Re: [PATCH v8 1/7] powerpc/pseries: Define common functions for RTAS sequence calls

2025-03-11 Thread Haren Myneni

On Wed, 2025-03-12 at 10:55 +0530, Mukesh Kumar Chaurasiya wrote:
> On Tue, Mar 11, 2025 at 03:50:42PM -0700, Haren Myneni wrote:
> > The RTAS call can be normal where retrieves the data form the
> > hypervisor once or sequence based RTAS call which has to
> > issue multiple times until the complete data is obtained. For
> > some of these sequence RTAS calls, the OS should not interleave
> > calls with different input until the sequence is completed.
> > The data is collected for each call and copy to the buffer
> > for the entire sequence during ioctl() handle and then expose
> > this buffer to the user space with read() handle.
> > 
> > One such sequence RTAS call is ibm,get-vpd and its support is
> > already included in the current code. To add the similar support
> > for other sequence based calls, move the common functions in to
> > separate file and update papr_rtas_sequence struct with the
> > following callbacks so that RTAS call specific code will be
> > defined and executed to complete the sequence.
> > 
> > struct papr_rtas_sequence {
> > int error;
> > void params;
> > void (*begin) (struct papr_rtas_sequence *);
> > void (*end) (struct papr_rtas_sequence *);
> > const char * (*work) (struct papr_rtas_sequence *, size_t
> > *);
> > };
> > 
> > params: Input parameters used to pass for RTAS call.
> > Begin:  RTAS call specific function to initialize data
> > including work area allocation.
> > End:RTAS call specific function to free up resources
> > (free work area) after the sequence is completed.
> > Work:   The actual RTAS call specific function which collects
> > the data from the hypervisor.
> > 
> > Signed-off-by: Haren Myneni 
> > ---
> >  arch/powerpc/platforms/pseries/Makefile   |   2 +-
> >  .../platforms/pseries/papr-rtas-common.c  | 310
> > 
> >  .../platforms/pseries/papr-rtas-common.h  |  61 +++
> >  arch/powerpc/platforms/pseries/papr-vpd.c | 351 +++---
> > 
> >  4 files changed, 416 insertions(+), 308 deletions(-)
> >  create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-
> > common.c
> >  create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-
> > common.h
> > 
> > diff --git a/arch/powerpc/platforms/pseries/Makefile
> > b/arch/powerpc/platforms/pseries/Makefile
> > index 7bf506f6b8c8..697c216b70dc 100644
> > --- a/arch/powerpc/platforms/pseries/Makefile
> > +++ b/arch/powerpc/platforms/pseries/Makefile
> > @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
> >  
> >  obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
> >of_helpers.o rtas-work-area.o papr-sysparm.o 
> > \
> > -  papr-vpd.o \
> > +  papr-rtas-common.o papr-vpd.o \
> >setup.o iommu.o event_sources.o ras.o \
> >firmware.o power.o dlpar.o mobility.o rng.o
> > \
> >pci.o pci_dlpar.o eeh_pseries.o msi.o \
> > diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c
> > b/arch/powerpc/platforms/pseries/papr-rtas-common.c
> > new file mode 100644
> > index ..2d0220209de0
> > --- /dev/null
> > +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c
> > @@ -0,0 +1,310 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +#define pr_fmt(fmt) "papr-common: " fmt
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include "papr-rtas-common.h"
> > +
> > +/*
> > + * Sequence based RTAS HCALL has to issue multiple times to
> > retrieve
> > + * complete data from the hypervisor. For some of these RTAS
> > calls,
> > + * the OS should not interleave calls with different input until
> > the
> > + * sequence is completed. So data is collected for these calls
> > during
> > + * ioctl handle and export to user space with read() handle.
> > + * This file provides common functions needed for such sequence
> > based
> > + * RTAS calls Ex: ibm,get-vpd and ibm,get-indices.
> > + */
> > +
> > +bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob)
> > +{
> > +   return blob->data && blob->len;
> > +}
> > +
> > +void papr_rtas_blob_free(const struct papr_rtas_blob *blob)
> > +{
> > +   if (blob) {
> > +   kvfree(blob->data);
> > +   kfree(blob);
> > +   }
> > +}
> > +
> > +/**
> > + * papr_rtas_blob_extend() - Append data to a &struct
> > papr_rtas_blob.
> > + * @blob: The blob to extend.
> > + * @data: The new data to append to @blob.
> > + * @len:  The length of @data.
> > + *
> > + * Context: May sleep.
> > + * Return: -ENOMEM on allocation failure, 0 otherwise.
> > + */
> > +static int papr_rtas_blob_extend(struct papr_rtas_blob *blob,
> > +   const char *data, size_t len)
> > +{
> > +   const size_t new_len = blob->len + len;
> > +   const size_t old_len = blob->len;
> > +   const char *old_ptr = blob->

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Mike Rapoport

On Tue, Mar 11, 2025 at 09:59:32PM +, Russell King (Oracle) wrote:
> On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:
> > On Thu, Mar 06, 2025 at 08:51:20PM +0200, Mike Rapoport wrote:
> > > From: "Mike Rapoport (Microsoft)" 
> > > 
> > > high_memory defines upper bound on the directly mapped memory.
> > > This bound is defined by the beginning of ZONE_HIGHMEM when a system has
> > > high memory and by the end of memory otherwise.
> > > 
> > > All this is known to generic memory management initialization code that
> > > can set high_memory while initializing core mm structures.
> > > 
> > > Remove per-architecture calculation of high_memory and add a generic
> > > version to free_area_init().
> > 
> > This patch appears to be causing breakage on a number of 32 bit arm
> > platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> > die on boot with no output, a bisect with qemu points at this commit and
> > those for physical platforms appear to be converging on the same place.
> 
> I'm not convinced that the old and the new code is doing the same
> thing.
> 
> The new code:
> 
> +   phys_addr_t highmem = memblock_end_of_DRAM();
> +
> +#ifdef CONFIG_HIGHMEM
> +   unsigned long pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
> +
> +   if (arch_has_descending_max_zone_pfns() || highmem > PFN_PHYS(pfn))
> +   highmem = PFN_PHYS(pfn);
> +#endif
> +
> +   high_memory = phys_to_virt(highmem - 1) + 1;
> 
> First, when CONFIG_HIGHMEM is disabled, this code assumes that the last
> byte of DRAM declared to memblock is the highmem limit. This _could_
> overflow phys_to_virt() and lead to an invalid value for high_memory.
> 
> Second, arch_zone_lowest_possible_pfn[ZONE_HIGHMEM] is the _start_ of
> highmem. This is not what arch code sets high_memory to - because
> the start of highmem may not contiguously follow on from lowmem.
> 
> In arch/arm/mm/mmu.c, lowmem_limit is computed to be the highest + 1
> physical address that lowmem can possibly be, taking into account the
> amount of vmalloc memory that is required. This is used to set
> high_memory.
> 
> We also limit the amount of usable RAM via memblock_set_current_limit()
> which memblock_end_of_DRAM() doesn't respect.
> 
> I don't think the proposed generic version is suitable for 32-bit arm.

Unless I'm missing something, both memblock.current_limit and start of
ZONE_HIGHMEM are set to arm_lowmem_limit which will be different from
memblock_end_of_DRAM() only for machines with more than nearly 4GiB of RAM
and those will supposedly use HIGHMEM anyway.

But this does not matter anyway because failures Mark reported happen
because 32-bit arm uses high_memory before mem_init() and that what causes
the hangs. 

Here's the fix I have, I'll send v2 shortly.

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e492d58a0386..f02f872ea8a9 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1250,6 +1250,8 @@ void __init adjust_lowmem_bounds(void)
 
arm_lowmem_limit = lowmem_limit;
 
+   high_memory = __va(arm_lowmem_limit - 1) + 1;
+
if (!memblock_limit)
memblock_limit = arm_lowmem_limit;
 
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 65903ed5e80d..1a8f6914ee59 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -146,6 +146,7 @@ void __init adjust_lowmem_bounds(void)
phys_addr_t end;
adjust_lowmem_bounds_mpu();
end = memblock_end_of_DRAM();
+   high_memory = __va(end - 1) + 1;
memblock_set_current_limit(end);
 }
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 545e11f1a3ba..0aef4bef93c4 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1765,14 +1765,20 @@ static bool arch_has_descending_max_zone_pfns(void)
 
 static void set_high_memory(void)
 {
+   unsigned long pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
phys_addr_t highmem = memblock_end_of_DRAM();
 
-#ifdef CONFIG_HIGHMEM
-   unsigned long pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
+   /*
+* Some architectures (e.g. ARM) set high_memory very early and
+* use it in arch setup code.
+* If an architecture already set high_memory don't overwrite it
+*/
+   if (high_memory)
+   return;
 
-   if (arch_has_descending_max_zone_pfns() || highmem > PFN_PHYS(pfn))
+   if (IS_ENABLED(CONFIG_HIGHMEM) &&
+   (arch_has_descending_max_zone_pfns() || highmem > PFN_PHYS(pfn)))
highmem = PFN_PHYS(pfn);
-#endif
 
high_memory = phys_to_virt(highmem - 1) + 1;
 }

-- 
Sincerely yours,
Mike.

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Russell King (Oracle)

On Tue, Mar 11, 2025 at 09:33:29PM +, Mark Brown wrote:
> [0.00] Booting Linux on physical CPU 0x0
> [0.00] Linux version 6.14.0-rc6-next-20250311 (tuxmake@tuxmake) 
> (arm-linux-gnueabihf-gcc (Debian 13.3.0-5) 13.3.0, GNU ld (GNU Binutils for 
> Debian) 2.43.1) #1 SMP @1741691801
> [0.00] CPU: ARMv7 Processor [414fc0f0] revision 0 (ARMv7), cr=10c5387d
> [0.00] CPU: div instructions available: patching division code
> [0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
> [0.00] OF: fdt: Machine model: linux,dummy-virt
> [0.00] random: crng init done
> [0.00] earlycon: pl11 at MMIO 0x0900 (options '')
> [0.00] printk: legacy bootconsole [pl11] enabled
> [0.00] Memory policy: Data cache writealloc
> [0.00] efi: UEFI not found.
> [0.00] cma: Reserved 64 MiB at 0x

If that CMA address is correct, then it's wrong. virt machines start
DRAM at 0x4000. This is a small memory VM:

[0.00] Zone ranges:
[0.00]   Normal   [mem 0x4000-0x45ff]
[0.00]   HighMem  empty

and this is a larger memory VM:

[0.00] Zone ranges:
[0.00]   Normal   [mem 0x4000-0x6fff]
[0.00]   HighMem  [mem 0x7000-0x7fff]

Neither have CMA enabled (it's not necessary for a VM).

On a real platform where CMA and highmem is enabled, then:

[0.00] cma: Reserved 16 MiB at 0x4f00 on node -1
[0.00] Zone ranges:
[0.00]   Normal   [mem 0x1000-0x3fff]
[0.00]   HighMem  [mem 0x4000-0x4fff]

So that "cma:" line you are seeing is indicating that something is very
very wrong - it should definitely not be zero.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

[PATCH v8 1/7] powerpc/pseries: Define common functions for RTAS sequence calls

2025-03-11 Thread Haren Myneni

The RTAS call can be normal where retrieves the data form the
hypervisor once or sequence based RTAS call which has to
issue multiple times until the complete data is obtained. For
some of these sequence RTAS calls, the OS should not interleave
calls with different input until the sequence is completed.
The data is collected for each call and copy to the buffer
for the entire sequence during ioctl() handle and then expose
this buffer to the user space with read() handle.

One such sequence RTAS call is ibm,get-vpd and its support is
already included in the current code. To add the similar support
for other sequence based calls, move the common functions in to
separate file and update papr_rtas_sequence struct with the
following callbacks so that RTAS call specific code will be
defined and executed to complete the sequence.

struct papr_rtas_sequence {
int error;
void params;
void (*begin) (struct papr_rtas_sequence *);
void (*end) (struct papr_rtas_sequence *);
const char * (*work) (struct papr_rtas_sequence *, size_t *);
};

params: Input parameters used to pass for RTAS call.
Begin:  RTAS call specific function to initialize data
including work area allocation.
End:RTAS call specific function to free up resources
(free work area) after the sequence is completed.
Work:   The actual RTAS call specific function which collects
the data from the hypervisor.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/Makefile   |   2 +-
 .../platforms/pseries/papr-rtas-common.c  | 310 
 .../platforms/pseries/papr-rtas-common.h  |  61 +++
 arch/powerpc/platforms/pseries/papr-vpd.c | 351 +++---
 4 files changed, 416 insertions(+), 308 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-common.c
 create mode 100644 arch/powerpc/platforms/pseries/papr-rtas-common.h

diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 7bf506f6b8c8..697c216b70dc 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o rtas-work-area.o papr-sysparm.o \
-  papr-vpd.o \
+  papr-rtas-common.o papr-vpd.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobility.o rng.o \
   pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c 
b/arch/powerpc/platforms/pseries/papr-rtas-common.c
new file mode 100644
index ..2d0220209de0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-common: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "papr-rtas-common.h"
+
+/*
+ * Sequence based RTAS HCALL has to issue multiple times to retrieve
+ * complete data from the hypervisor. For some of these RTAS calls,
+ * the OS should not interleave calls with different input until the
+ * sequence is completed. So data is collected for these calls during
+ * ioctl handle and export to user space with read() handle.
+ * This file provides common functions needed for such sequence based
+ * RTAS calls Ex: ibm,get-vpd and ibm,get-indices.
+ */
+
+bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob)
+{
+   return blob->data && blob->len;
+}
+
+void papr_rtas_blob_free(const struct papr_rtas_blob *blob)
+{
+   if (blob) {
+   kvfree(blob->data);
+   kfree(blob);
+   }
+}
+
+/**
+ * papr_rtas_blob_extend() - Append data to a &struct papr_rtas_blob.
+ * @blob: The blob to extend.
+ * @data: The new data to append to @blob.
+ * @len:  The length of @data.
+ *
+ * Context: May sleep.
+ * Return: -ENOMEM on allocation failure, 0 otherwise.
+ */
+static int papr_rtas_blob_extend(struct papr_rtas_blob *blob,
+   const char *data, size_t len)
+{
+   const size_t new_len = blob->len + len;
+   const size_t old_len = blob->len;
+   const char *old_ptr = blob->data;
+   char *new_ptr;
+
+   new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT);
+   if (!new_ptr)
+   return -ENOMEM;
+
+   memcpy(&new_ptr[old_len], data, len);
+   blob->data = new_ptr;
+   blob->len = new_len;
+   return 0;
+}
+
+/**
+ * papr_rtas_blob_generate() - Construct a new &struct papr_rtas_blob.
+ * @seq: work function of the caller that is called to obtain
+ *   data with the caller RTAS call.
+ *
+ * The @work callback is invoked until it returns NULL. @seq is
+ * passed to @work in its first argument on each c

[PATCH v8 3/7] powerpc/pseries: Add papr-indices char driver for ibm,get-indices

2025-03-11 Thread Haren Myneni

The RTAS call ibm,get-indices is used to obtain indices and
location codes for a specified indicator or sensor token. The
current implementation uses rtas_get_indices() API provided by
librtas library which allocates RMO buffer and issue this RTAS
call in the user space. But writable mapping /dev/mem access by
the user space is prohibited under system lockdown.

To overcome the restricted access in the user space, the kernel
provide interfaces to collect indices data from the hypervisor.
This patch adds papr-indices character driver and expose standard
interfaces such as open / ioctl/ read to user space in ways that
are compatible with lockdown.

PAPR (2.13 7.3.17 ibm,get-indices RTAS Call) describes the
following steps to retrieve all indices data:
- User input parameters to the RTAS call: sensor or indicator,
  and indice type
- ibm,get-indices is sequence RTAS call which means has to issue
  multiple times to get the entire list of indicators or sensors
  of a particular type. The hypervisor expects the first RTAS call
  with the sequence 1 and the subsequent calls with the sequence
  number returned from the previous calls.
- The OS may not interleave calls to ibm,get-indices for different
  indicator or sensor types. Means other RTAS calls with different
  type should not be issued while the previous type sequence is in
  progress. So collect the entire list of indices and copied to
  buffer BLOB during ioctl() and expose this buffer to the user
  space with the file descriptor.
- The hypervisor fills the work area with a specific format but
  does not return the number of bytes written to the buffer.
  Instead of parsing the data for each call to determine the data
  length, copy the work area size (RTAS_GET_INDICES_BUF_SIZE) to
  the buffer. Return work-area size of data to the user space for
  each read() call.

Expose these interfaces to user space with a /dev/papr-indices
character device using the following programming model:

 int devfd = open("/dev/papr-indices", O_RDONLY);
 int fd = ioctl(devfd, PAPR_INDICES_IOC_GET,
struct papr_indices_io_block)
  - Collect all indices data for the specified token to the buffer
 char *buf = malloc(RTAS_GET_INDICES_BUF_SIZE);
 length = read(fd, buf,  RTAS_GET_INDICES_BUF_SIZE)
  - RTAS_GET_INDICES_BUF_SIZE of data is returned to the user
space.
  - The user space retrieves the indices and their location codes
from the buffer
  - Should issue multiple read() calls until reaches the end of
BLOB buffer.

The read() should use the file descriptor obtained from ioctl to
get the data that is exposed to file descriptor. Implemented
support in librtas (rtas_get_indices()) for this new ABI for
system lockdown.

Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/rtas.h   |   1 +
 arch/powerpc/kernel/rtas.c|   2 +-
 arch/powerpc/platforms/pseries/Makefile   |   2 +-
 arch/powerpc/platforms/pseries/papr-indices.c | 301 ++
 4 files changed, 304 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/papr-indices.c

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 04406162fc5a..7dc527a5aaac 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -515,6 +515,7 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
 extern unsigned long rtas_rmo_buf;
 
 extern struct mutex rtas_ibm_get_vpd_lock;
+extern struct mutex rtas_ibm_get_indices_lock;
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d31c9799cab2..76c634b92cb2 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -93,11 +93,11 @@ struct rtas_function {
  */
 static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
 static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
-static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
 static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
 static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
 static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
 DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+DEFINE_MUTEX(rtas_ibm_get_indices_lock);
 
 static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 697c216b70dc..e1db61877bb9 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o rtas-work-area.o papr-sysparm.o \
-  papr-rtas-common.o papr-vpd.o \
+  papr-rtas-common.o papr-vpd.o papr-indices.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobilit

[PATCH v8 4/7] powerpc/pseries: Add ibm,set-dynamic-indicator RTAS call support

2025-03-11 Thread Haren Myneni

The RTAS call ibm,set-dynamic-indicator is used to set the new
indicator state identified by a location code. The current
implementation uses rtas_set_dynamic_indicator() API provided by
librtas library which allocates RMO buffer and issue this RTAS
call in the user space. But /dev/mem access by the user space
is prohibited under system lockdown.

This patch provides an interface with new ioctl
PAPR_DYNAMIC_INDICATOR_IOC_SET to the papr-indices character
driver and expose this interface to the user space that is
compatible with lockdown.

Refer PAPR 7.3.18 ibm,set-dynamic-indicator for more
information on this RTAS call.
-  User input parameters to the RTAS call: location code
   string, indicator token and new state

Expose these interfaces to user space with a /dev/papr-indices
character device using the following programming model:
 int fd = open("/dev/papr-indices", O_RDWR);
 int ret = ioctl(fd, PAPR_DYNAMIC_INDICATOR_IOC_SET,
struct papr_indices_io_block)
  - The user space passes input parameters in papr_indices_io_block
struct

Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/rtas.h   |   1 +
 arch/powerpc/kernel/rtas.c|   2 +-
 arch/powerpc/platforms/pseries/papr-indices.c | 120 ++
 3 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 7dc527a5aaac..2da52f59e4c6 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -516,6 +516,7 @@ extern unsigned long rtas_rmo_buf;
 
 extern struct mutex rtas_ibm_get_vpd_lock;
 extern struct mutex rtas_ibm_get_indices_lock;
+extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 76c634b92cb2..88fa416730af 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -95,9 +95,9 @@ static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
 static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
 static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
 static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
-static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
 DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
 DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
 
 static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
diff --git a/arch/powerpc/platforms/pseries/papr-indices.c 
b/arch/powerpc/platforms/pseries/papr-indices.c
index a0b3c9a61c5c..c46b728d5f47 100644
--- a/arch/powerpc/platforms/pseries/papr-indices.c
+++ b/arch/powerpc/platforms/pseries/papr-indices.c
@@ -20,6 +20,13 @@
 #include 
 #include "papr-rtas-common.h"
 
+/*
+ * Function-specific return values for ibm,set-dynamic-indicator and
+ * ibm,get-dynamic-sensor-state RTAS calls.
+ * PAPR+ v2.13 7.3.18 and 7.3.19.
+ */
+#define RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR   -3
+
 /**
  * struct rtas_get_indices_params - Parameters (in and out) for
  *  ibm,get-indices.
@@ -260,6 +267,110 @@ static long papr_indices_create_handle(struct 
papr_indices_io_block __user *ubuf
return fd;
 }
 
+/*
+ * Create work area with the input parameters. This function is used
+ * for both ibm,set-dynamic-indicator and ibm,get-dynamic-sensor-state
+ * RTAS Calls.
+ */
+static struct rtas_work_area *
+papr_dynamic_indice_buf_from_user(struct papr_indices_io_block __user *ubuf,
+   struct papr_indices_io_block *kbuf)
+{
+   struct rtas_work_area *work_area;
+   u32 length;
+   __be32 len_be;
+
+   if (copy_from_user(kbuf, ubuf, sizeof(*kbuf)))
+   return ERR_PTR(-EFAULT);
+
+
+   if (!string_is_terminated(kbuf->dynamic_param.location_code_str,
+   ARRAY_SIZE(kbuf->dynamic_param.location_code_str)))
+   return ERR_PTR(-EINVAL);
+
+   /*
+* The input data in the work area should be as follows:
+* - 32-bit integer length of the location code string,
+*   including NULL.
+* - Location code string, NULL terminated, identifying the
+*   token (sensor or indicator).
+* PAPR 2.13 - R1–7.3.18–5 ibm,set-dynamic-indicator
+*   - R1–7.3.19–5 ibm,get-dynamic-sensor-state
+*/
+   /*
+* Length that user space passed should also include NULL
+* terminator.
+*/
+   length = strlen(kbuf->dynamic_param.location_code_str) + 1;
+   if (length > LOC_CODE_SIZE)
+   return ERR_PTR(-EINVAL);
+
+   len_be = cpu_to_be32(length);
+
+   work_area = rtas_work_area_alloc(LOC_CODE_SIZE + sizeof(u32));
+   memcpy(rtas_work_area_raw_buf(work_area), &len_be, sizeof(u32));
+   memcpy((rtas_work_area_raw_buf(work_area) + sizeof(u32)),
+   &kbuf->dynamic_param.location_cod

[PATCH v8 0/7] Add character devices for indices, platform-dump and physical-attestation RTAS

2025-03-11 Thread Haren Myneni

Several APIs such as rtas_get_indices(), rtas_get_dynamic_sensor(),
rtas_set_dynamic_indicator(), rtas_platform_dump() and
rtas_physical_attestation()  provided by librtas library are
implemented in user space using rtas syscall in combination with
writable mappings of /dev/mem. But this implementation is not
compatible with system lockdown which prohibits /dev/mem access.
The current kernel already provides char based driver interfaces
for several RTAS calls such as VPD and system parameters to
support lockdown feature.

This patch series adds new char based drivers, /dev/papr-indices
for ibm,get-indices, ibm,get-dynamic-sensor-state and
ibm,set-dynamic-indicator RTAS Calls. /dev/papr-platform-dump
for ibm,platform-dump and /dev/papr-physical-attestation
fir ibm,physical-attestation. Providing the similar
open/ioctl/read interfaces to the user space as in the case of
VPD and system parameters.

I have made changes to librtas library to use the new kernel
interfaces if the corresponding device entry is available.

This patch series has the following patches:
powerpc/pseries: Define common functions for RTAS sequence calls
- For some of sequence based RTAS calls, the OS should not start
  another sequence with different input until the previous sequence
  is completed. So the sequence should be completed during ioctl()
  and expose the entire buffer during read(). ibm,get-indices is
  sequence based RTAS function similar to ibm,get-vpd and we already
  have the corresponding implementation for VPD driver. So update
  papr_rtas_sequence struct for RTAS call specific functions and move
  the top level sequence functions in to a separate file.

powerpc/pseries: Define papr_indices_io_block for papr-indices ioctls
- /dev/papr-indices driver supports ibm,get-indices,
  ibm,get-dynamic-sensor-state and ibm,set-dynamic-indicator RTAS Calls.
  papr-indices.h introduces 3 different ioctls for these RTAS calls and
  the corresponding ioctl input buffer.

powerpc/pseries: Add papr-indices char driver for ibm,get-indices
- Introduce /dev/papr-indices char based driver and add support for
  get-indices RTAS function

powerpc/pseries: Add ibm,set-dynamic-indicator RTAS call support
- Update /dev/papr-indices for set-dynamic-indicator RTAS function

powerpc/pseries: Add ibm,get-dynamic-sensor-state RTAS call support
-  Update /dev/papr-indices for  get-dynamic-sensor-state RTAS function

powerpc/pseries: Add papr-platform-dump character driver for dump
   retrieval
- Introduce /dev/papr-platform-dump char driver and adds support for
  ibm,platform-dump. Received suggestions from the previous post as a
  separate patch - Updated the patch with invalidating the dump using
  a separate ioctl.

powerpc/pseries: Add a char driver for papr-physical-attestation RTAS
- Introduce /dev/papr-physical-attestation char driver to provide
  kernel interface for ibm,physical-attestation RTAS function.

Changelog:
v8:
- Fixed build warnings for the proper function parameter descriptions
  (vpd_sequence_begin(), few papr_rtas_*() functions, and etc) as
  reported by kernel test robot 

v7:
- Pass the proper next value to the subsequent RTAS calls for the
  get-indices sequence RTAS.
  (Vasireddy Sathvika found this bug).

v6:
- Define the proper command ID for PAPR_PHY_ATTEST_IOC_HANDLE ioctl
- Update ioctls description in ioctl-number.rst.

v5:
- Return with -EINPROGRESS in papr_platform_dump_invalidate_ioctl()
  if the complete dump is not read (Suggested by Michal Suchánek).

v4:
- Include patch "Add char driver for papr-physical-attestation RTAS"
  in this series. ibm,physical-attestation is sequence based RTAS
  call and the implementation is also similar to ibm,get-vpd and
  ibm,get-indices.

v3:
- put_unused_fd() only after get_unused_fd() successful for the failure
  case later ("Add papr-platform-dump character driver for dump
  retrieval" patch).

v2:
- Added unlock rtas_ibm_set_dynamic_indicator_lock and
  rtas_ibm_get_dynamic_sensor_state_lock mutex for failure cases
  as reported by Dan Carpenter
- Fixed build warnings for the proper function parameter descriptions
  as reported by kernel test robot 

Haren Myneni (7):
  powerpc/pseries: Define common functions for RTAS sequence calls
  powerpc/pseries: Define papr_indices_io_block for papr-indices ioctls
  powerpc/pseries: Add papr-indices char driver for ibm,get-indices
  powerpc/pseries: Add ibm,set-dynamic-indicator RTAS call support
  powerpc/pseries: Add ibm,get-dynamic-sensor-state RTAS call support
  powerpc/pseries: Add papr-platform-dump character driver for dump
retrieval
  powerpc/pseries: Add a char driver for physical-attestation RTAS

 .../userspace-api/ioctl/ioctl-number.rst  |   6 +
 arch/powerpc/include/asm/rtas.h   |   4 +
 arch/powerpc/include/uapi/asm/papr-indices.h  |  41 ++
 .../uapi/asm/papr-physical-attestation.h  |  31 ++
 .../include/uapi/asm/papr-platform-dump.h |  15 +
 arch/powerpc/kernel/rtas.c|   8

[PATCH v8 2/7] powerpc/pseries: Define papr_indices_io_block for papr-indices ioctls

2025-03-11 Thread Haren Myneni

To issue ibm,get-indices, ibm,set-dynamic-indicator and
ibm,get-dynamic-sensor-state in the user space, the RMO buffer is
allocated for the work area which is restricted under system
lockdown. So instead of user space execution, the kernel will
provide /dev/papr-indices interface to execute these RTAS calls.

The user space assigns data in papr_indices_io_block struct
depends on the specific HCALL and passes to the following ioctls:

PAPR_INDICES_IOC_GET:   Use for ibm,get-indices. Returns a
get-indices handle fd to read data.
PAPR_DYNAMIC_SENSOR_IOC_GET:Use for  ibm,get-dynamic-sensor-state.
Updates the sensor state in
papr_indices_io_block.dynamic_param.state

PAPR_DYNAMIC_INDICATOR_IOC_SET: Use for ibm,set-dynamic-indicator.
Sets the new state for the input
indicator.

Signed-off-by: Haren Myneni 
---
 .../userspace-api/ioctl/ioctl-number.rst  |  2 +
 arch/powerpc/include/uapi/asm/papr-indices.h  | 41 +++
 2 files changed, 43 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/papr-indices.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 6d1465315df3..f9332b634116 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -363,6 +363,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
  

+0xB2  03-05 arch/powerpc/include/uapi/asm/papr-indices.h 
powerpc/pseries indices API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h 
b/arch/powerpc/include/uapi/asm/papr-indices.h
new file mode 100644
index ..c2999d89d52a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-indices.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_INDICES_H_
+#define _UAPI_PAPR_INDICES_H_
+
+#include 
+#include 
+#include 
+
+#define LOC_CODE_SIZE  80
+#define RTAS_GET_INDICES_BUF_SIZE  SZ_4K
+
+struct papr_indices_io_block {
+   union {
+   struct {
+   __u8 is_sensor; /* 0 for indicator and 1 for sensor */
+   __u32 indice_type;
+   } indices;
+   struct {
+   __u32 token; /* Sensor or indicator token */
+   __u32 state; /* get / set state */
+   /*
+* PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus null.
+*/
+   char location_code_str[LOC_CODE_SIZE]; /* location code 
*/
+   } dynamic_param;
+   };
+};
+
+/*
+ * ioctls for /dev/papr-indices.
+ * PAPR_INDICES_IOC_GET: Returns a get-indices handle fd to read data
+ * PAPR_DYNAMIC_SENSOR_IOC_GET: Gets the state of the input sensor
+ * PAPR_DYNAMIC_INDICATOR_IOC_SET: Sets the new state for the input indicator
+ */
+#define PAPR_INDICES_IOC_GET   _IOW(PAPR_MISCDEV_IOC_ID, 3, struct 
papr_indices_io_block)
+#define PAPR_DYNAMIC_SENSOR_IOC_GET_IOWR(PAPR_MISCDEV_IOC_ID, 4, struct 
papr_indices_io_block)
+#define PAPR_DYNAMIC_INDICATOR_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 5, struct 
papr_indices_io_block)
+
+
+#endif /* _UAPI_PAPR_INDICES_H_ */
-- 
2.43.5

[PATCH v8 7/7] powerpc/pseries: Add a char driver for physical-attestation RTAS

2025-03-11 Thread Haren Myneni

The RTAS call ibm,physical-attestation is used to retrieve
information about the trusted boot state of the firmware and
hypervisor on the system, and also Trusted Platform Modules (TPM)
data if the system is TCG 2.0 compliant.

This RTAS interface expects the caller to define different command
structs such as RetrieveTPMLog, RetrievePlatformCertificat and etc,
in a work area with a maximum size of 4K bytes and the response
buffer will be returned in the same work area.

The current implementation of this RTAS function is in the user
space but allocation of the work area is restricted with the system
lockdown. So this patch implements this RTAS function in the kernel
and expose to the user space with open/ioctl/read interfaces.

PAPR (2.13+ 21.3 ibm,physical-attestation) defines RTAS function:
- Pass the command struct to obtain the response buffer for the
  specific command.
- This RTAS function is sequence RTAS call and has to issue RTAS
  call multiple times to get the complete response buffer (max 64K).
  The hypervisor expects the first RTAS call with the sequence 1 and
  the subsequent calls with the sequence number returned from the
  previous calls.

Expose these interfaces to user space with a
/dev/papr-physical-attestation character device using the following
programming model:

 int devfd = open("/dev/papr-physical-attestation");
 int fd = ioctl(devfd, PAPR_PHY_ATTEST_IOC_HANDLE,
  struct papr_phy_attest_io_block);
 - The user space defines the command struct and requests the
   response for any command.
 - Obtain the complete response buffer and returned the buffer as
   blob to the command specific FD.
 size = read(fd, buf, len);
 - Can retrieve the response buffer once or multiple times until the
   end of BLOB buffer.

Implemented this new kernel ABI support in librtas library for
system lockdown

Signed-off-by: Haren Myneni 
---
 .../userspace-api/ioctl/ioctl-number.rst  |   2 +
 arch/powerpc/include/asm/rtas.h   |   1 +
 .../uapi/asm/papr-physical-attestation.h  |  31 ++
 arch/powerpc/kernel/rtas.c|   2 +-
 arch/powerpc/platforms/pseries/Makefile   |   2 +-
 .../platforms/pseries/papr-phy-attest.c   | 288 ++
 6 files changed, 324 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/papr-physical-attestation.h
 create mode 100644 arch/powerpc/platforms/pseries/papr-phy-attest.c

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 1b661436aa7c..504d059970d4 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -367,6 +367,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h   
powerpc/pseries Platform Dump API
  

+0xB2  08  arch/powerpc/include/uapi/asm/papr-physical-attestation.h  
powerpc/pseries Physical Attestation API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index fcd822f0e1d7..75fa0293c508 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -518,6 +518,7 @@ extern struct mutex rtas_ibm_get_vpd_lock;
 extern struct mutex rtas_ibm_get_indices_lock;
 extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
 extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock;
+extern struct mutex rtas_ibm_physical_attestation_lock;
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
diff --git a/arch/powerpc/include/uapi/asm/papr-physical-attestation.h 
b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
new file mode 100644
index ..ea746837bb9a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+#define _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+
+#include 
+#include 
+#include 
+
+#define PAPR_PHYATTEST_MAX_INPUT 4084 /* Max 4K buffer: 4K-12 */
+
+/*
+ * Defined in PAPR 2.13+ 21.6 Attestation Command Structures.
+ * User space pass this struct and the max size should be 4K.
+ */
+struct papr_phy_attest_io_block {
+   __u8 version;
+   __u8 command;
+   __u8 TCG_major_ver;
+   __u8 TCG_minor_ver;
+   __be32 length;
+   __be32 correlator;
+   __u8 payload[PAPR_PHYATTEST_MAX_INPUT];
+};
+

[PATCH v8 5/7] powerpc/pseries: Add ibm,get-dynamic-sensor-state RTAS call support

2025-03-11 Thread Haren Myneni

The RTAS call ibm,get-dynamic-sensor-state is used to get the
sensor state identified by the location code and the sensor
token. The librtas library provides an API
rtas_get_dynamic_sensor() which uses /dev/mem access for work
area allocation but is restricted under system lockdown.

This patch provides an interface with new ioctl
 PAPR_DYNAMIC_SENSOR_IOC_GET to the papr-indices character
driver which executes this HCALL and copies the sensor state
in the user specified ioctl buffer.

Refer PAPR 7.3.19 ibm,get-dynamic-sensor-state for more
information on this RTAS call.
- User input parameters to the RTAS call: location code string
  and the sensor token

Expose these interfaces to user space with a /dev/papr-indices
character device using the following programming model:
 int fd = open("/dev/papr-indices", O_RDWR);
 int ret = ioctl(fd, PAPR_DYNAMIC_SENSOR_IOC_GET,
struct papr_indices_io_block)
  - The user space specifies input parameters in
papr_indices_io_block struct
  - Returned state for the specified sensor is copied to
papr_indices_io_block.dynamic_param.state

Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/rtas.h   |  1 +
 arch/powerpc/kernel/rtas.c|  2 +-
 arch/powerpc/platforms/pseries/papr-indices.c | 67 +++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 2da52f59e4c6..fcd822f0e1d7 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -517,6 +517,7 @@ extern unsigned long rtas_rmo_buf;
 extern struct mutex rtas_ibm_get_vpd_lock;
 extern struct mutex rtas_ibm_get_indices_lock;
 extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
+extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock;
 
 #define GLOBAL_INTERRUPT_QUEUE 9005
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 88fa416730af..a4848e7f248e 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -92,12 +92,12 @@ struct rtas_function {
  * Per-function locks for sequence-based RTAS functions.
  */
 static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
-static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
 static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
 static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
 DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
 DEFINE_MUTEX(rtas_ibm_get_indices_lock);
 DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
 
 static struct rtas_function rtas_function_table[] __ro_after_init = {
[RTAS_FNIDX__CHECK_EXCEPTION] = {
diff --git a/arch/powerpc/platforms/pseries/papr-indices.c 
b/arch/powerpc/platforms/pseries/papr-indices.c
index c46b728d5f47..3c7545591c45 100644
--- a/arch/powerpc/platforms/pseries/papr-indices.c
+++ b/arch/powerpc/platforms/pseries/papr-indices.c
@@ -371,6 +371,67 @@ static long papr_dynamic_indicator_ioc_set(struct 
papr_indices_io_block __user *
return ret;
 }
 
+/**
+ * papr_dynamic_sensor_ioc_get - ibm,get-dynamic-sensor-state RTAS Call
+ * PAPR 2.13 7.3.19
+ *
+ * @ubuf: Input parameters to RTAS call such as sensor token
+ *Copies the state in user space buffer.
+ *
+ *
+ * Returns success or -errno.
+ */
+
+static long papr_dynamic_sensor_ioc_get(struct papr_indices_io_block __user 
*ubuf)
+{
+   struct papr_indices_io_block kbuf;
+   struct rtas_work_area *work_area;
+   s32 fwrc, token, ret;
+   u32 rets;
+
+   token = rtas_function_token(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE);
+   if (token == RTAS_UNKNOWN_SERVICE)
+   return -ENOENT;
+
+   mutex_lock(&rtas_ibm_get_dynamic_sensor_state_lock);
+   work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf);
+   if (IS_ERR(work_area)) {
+   ret = PTR_ERR(work_area);
+   goto out;
+   }
+
+   do {
+   fwrc = rtas_call(token, 2, 2, &rets,
+   kbuf.dynamic_param.token,
+   rtas_work_area_phys(work_area));
+   } while (rtas_busy_delay(fwrc));
+
+   rtas_work_area_free(work_area);
+
+   switch (fwrc) {
+   case RTAS_SUCCESS:
+   if (put_user(rets, &ubuf->dynamic_param.state))
+   ret = -EFAULT;
+   else
+   ret = 0;
+   break;
+   case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR:  /* No such indicator */
+   ret = -EOPNOTSUPP;
+   break;
+   default:
+   pr_err("unexpected ibm,get-dynamic-sensor result %d\n",
+   fwrc);
+   fallthrough;
+   case RTAS_HARDWARE_ERROR:   /* Hardware/platform error */
+   ret = -EIO;
+   break;
+   }
+
+out:
+   mutex_unlock(&rtas_ibm_get_dynamic_sensor_state_lock);
+   return ret;
+}
+
 /*
  * Top-level ioctl han

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Mike Rapoport

Hi Mark,

On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:
> On Thu, Mar 06, 2025 at 08:51:20PM +0200, Mike Rapoport wrote:
> > From: "Mike Rapoport (Microsoft)" 
> > 
> > high_memory defines upper bound on the directly mapped memory.
> > This bound is defined by the beginning of ZONE_HIGHMEM when a system has
> > high memory and by the end of memory otherwise.
> > 
> > All this is known to generic memory management initialization code that
> > can set high_memory while initializing core mm structures.
> > 
> > Remove per-architecture calculation of high_memory and add a generic
> > version to free_area_init().
> 
> This patch appears to be causing breakage on a number of 32 bit arm
> platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> die on boot with no output, a bisect with qemu points at this commit and
> those for physical platforms appear to be converging on the same place.

Can you share how this can be reproduced with qemu?

-- 
Sincerely yours,
Mike.

Re: [PATCH 10/13] arch, mm: set high_memory in free_area_init()

2025-03-11 Thread Mark Brown

On Tue, Mar 11, 2025 at 11:06:56PM +0200, Mike Rapoport wrote:
> On Tue, Mar 11, 2025 at 05:51:06PM +, Mark Brown wrote:

> > This patch appears to be causing breakage on a number of 32 bit arm
> > platforms, including qemu's virt-2.11,gic-version=3.  Affected platforms
> > die on boot with no output, a bisect with qemu points at this commit and
> > those for physical platforms appear to be converging on the same place.

> Can you share how this can be reproduced with qemu?

https://lava.sirena.org.uk/scheduler/job/1184953

Turns out it's actually producing output on qemu:

[0.00] Booting Linux on physical CPU 0x0
[0.00] Linux version 6.14.0-rc6-next-20250311 (tuxmake@tuxmake) 
(arm-linux-gnueabihf-gcc (Debian 13.3.0-5) 13.3.0, GNU ld (GNU Binutils for 
Debian) 2.43.1) #1 SMP @1741691801
[0.00] CPU: ARMv7 Processor [414fc0f0] revision 0 (ARMv7), cr=10c5387d
[0.00] CPU: div instructions available: patching division code
[0.00] CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
[0.00] OF: fdt: Machine model: linux,dummy-virt
[0.00] random: crng init done
[0.00] earlycon: pl11 at MMIO 0x0900 (options '')
[0.00] printk: legacy bootconsole [pl11] enabled
[0.00] Memory policy: Data cache writealloc
[0.00] efi: UEFI not found.
[0.00] cma: Reserved 64 MiB at 0x

- I'd only been sampling the logs for the physical platforms, none of
which had shown anything.

(you dropped me from the CCs BTW!)


signature.asc
Description: PGP signature

Re: [PATCH] Documentation: ocxl.rst: Update consortium site

2025-03-11 Thread Andrew Donnellan

On Tue, 2025-03-11 at 11:24 -0700, Fritz Koenig wrote:
> Old site no longer associated with consortium.

Thanks for catching this!

> 
> Signed-off-by: Fritz Koenig 
> ---
> After mergers the OpenCAPI Consortium does not seem to exist.
> The github page is the only seemingly relevant site, but it
> has not been updated in 4 years.
> ---
>  Documentation/userspace-api/accelerators/ocxl.rst | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/userspace-api/accelerators/ocxl.rst
> b/Documentation/userspace-api/accelerators/ocxl.rst
> index db7570d5e50d..5fc86ead39f4 100644
> --- a/Documentation/userspace-api/accelerators/ocxl.rst
> +++ b/Documentation/userspace-api/accelerators/ocxl.rst
> @@ -3,8 +3,8 @@ OpenCAPI (Open Coherent Accelerator Processor
> Interface)
>  
>  
>  OpenCAPI is an interface between processors and accelerators. It
> aims
> -at being low-latency and high-bandwidth. The specification is
> -developed by the `OpenCAPI Consortium `_.
> +at being low-latency and high-bandwidth. The specification is
> developed
> +by the `OpenCAPI Consortium
> `_.

This link is to the OC-Accel framework documentation, which isn't the
core set of OpenCAPI specifications. Those specs are now hosted by CXL,
post-merger.

I think I'd update this to say:

The specification was developed by the OpenCAPI Consortium, and is now
available from the `Compute Express Link Consortium
_.

Thanks,
Andrew

-- 
Andrew DonnellanOzLabs, ADL Canberra
a...@linux.ibm.com   IBM Australia Limited

Re: [PATCH v4] dt-bindings: dma: Convert fsl,elo*-dma to YAML

2025-03-11 Thread Rob Herring (Arm)



On Sat, 08 Mar 2025 19:33:39 +0100, J. Neuschäfer wrote:
> The devicetree bindings for Freescale DMA engines have so far existed as
> a text file. This patch converts them to YAML, and specifies all the
> compatible strings currently in use in arch/powerpc/boot/dts.
> 
> Signed-off-by: J. Neuschäfer 
> ---
> I considered referencing dma-controller.yaml, but that requires
> the #dma-cells property (via dma-common.yaml), and I'm now sure which
> value it should have, if any. Therefore I did not reference
> dma-controller.yaml.
> 
> V4:
> - switch DMA controller node name (in examples) back to dma@ because the
>   dma-controller.yaml binding is not used.
> 
> V3:
> - Link: 
> https://lore.kernel.org/r/20250226-ppcyaml-dma-v3-1-79ce31335...@posteo.net
> - split out as a single patch
> - restructure "description" definitions to use "items:" as much as possible
> - remove useless description of interrupts in fsl,elo3-dma
> - rename DMA controller nodes to dma-controller@...
> - use IRQ_TYPE_* constants in examples
> - define unit address format for DMA channel nodes
> - drop interrupts-parent properties from examples
> 
> V2:
> - part of series [PATCH v2 00/12] YAML conversion of several 
> Freescale/PowerPC DT bindings
>   Link: 
> https://lore.kernel.org/lkml/20250207-ppcyaml-v2-5-8137b0c42...@posteo.net/
> - remove unnecessary multiline markers
> - fix additionalProperties to always be false
> - add description/maxItems to interrupts
> - add missing #address-cells/#size-cells properties
> - convert "Note on DMA channel compatible properties" to YAML by listing
>   fsl,ssi-dma-channel as a valid compatible value
> - fix property ordering in examples: compatible and reg come first
> - add missing newlines in examples
> - trim subject line (remove "bindings")
> ---
>  .../devicetree/bindings/dma/fsl,elo-dma.yaml   | 137 ++
>  .../devicetree/bindings/dma/fsl,elo3-dma.yaml  | 125 +
>  .../devicetree/bindings/dma/fsl,eloplus-dma.yaml   | 132 +
>  .../devicetree/bindings/powerpc/fsl/dma.txt| 204 
> -
>  4 files changed, 394 insertions(+), 204 deletions(-)
> 

Reviewed-by: Rob Herring (Arm)

[PATCH v3 2/2] book3s64/radix : Align section vmemmap start address to PAGE_SIZE

2025-03-11 Thread Donet Tom

A vmemmap altmap is a device-provided region used to provide
backing storage for struct pages. For each namespace, the altmap
should belong to that same namespace. If the namespaces are
created unaligned, there is a chance that the section vmemmap
start address could also be unaligned. If the section vmemmap
start address is unaligned, the altmap page allocated from the
current namespace might be used by the previous namespace also.
During the free operation, since the altmap is shared between two
namespaces, the previous namespace may detect that the page does
not belong to its altmap and incorrectly assume that the page is a
normal page. It then attempts to free the normal page, which leads
to a kernel crash.

Kernel attempted to read user page (18) - exploit attempt? (uid: 0)
BUG: Kernel NULL pointer dereference on read at 0x0018
Faulting instruction address: 0xc0530c7c
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
CPU: 32 PID: 2104 Comm: ndctl Kdump: loaded Tainted: GW
NIP:  c0530c7c LR: c0530e00 CTR: 7ffe
REGS: c00015e57040 TRAP: 0300   Tainted: GW
MSR:  8280b033   CR: 84482404
CFAR: c0530dfc DAR: 0018 DSISR: 4000 IRQMASK: 0
GPR00: c0530e00 c00015e572e0 c2c5cb00 c00c000101008040
GPR04:  0007 0001 001f
GPR08: 0005  0018 2000
GPR12: c01d2fb0 c060de6b0080  c060dbf90020
GPR16: c00c000101008000 0001  c00125b20f00
GPR20: 0001   c00c000101007fff
GPR24: 0001   
GPR28: 04040201 0001  c00c000101008040
NIP [c0530c7c] get_pfnblock_flags_mask+0x7c/0xd0
LR [c0530e00] free_unref_page_prepare+0x130/0x4f0
Call Trace:
free_unref_page+0x50/0x1e0
free_reserved_page+0x40/0x68
free_vmemmap_pages+0x98/0xe0
remove_pte_table+0x164/0x1e8
remove_pmd_table+0x204/0x2c8
remove_pud_table+0x1c4/0x288
remove_pagetable+0x1c8/0x310
vmemmap_free+0x24/0x50
section_deactivate+0x28c/0x2a0
__remove_pages+0x84/0x110
arch_remove_memory+0x38/0x60
memunmap_pages+0x18c/0x3d0
devm_action_release+0x30/0x50
release_nodes+0x68/0x140
devres_release_group+0x100/0x190
dax_pmem_compat_release+0x44/0x80 [dax_pmem_compat]
device_for_each_child+0x8c/0x100
[dax_pmem_compat_remove+0x2c/0x50 [dax_pmem_compat]
nvdimm_bus_remove+0x78/0x140 [libnvdimm]
device_remove+0x70/0xd0

Another issue is that if there is no altmap, a PMD-sized vmemmap
page will be allocated from RAM, regardless of the alignment of
the section start address. If the section start address is not
aligned to the PMD size, a VM_BUG_ON will be triggered when
setting the PMD-sized page to page table.

In this patch, we are aligning the section vmemmap start address
to PAGE_SIZE. After alignment, the start address will not be
part of the current namespace, and a normal page will be allocated
for the vmemmap mapping of the current section. For the remaining
sections, altmaps will be allocated. During the free operation,
the normal page will be correctly freed.

In the same way, a PMD_SIZE vmemmap page will be allocated only if
the section start address is PMD_SIZE-aligned; otherwise, it will
fall back to a PAGE-sized vmemmap allocation.

Without this patch
==
NS1 start   NS2 start
 _
| NS1   |NS2  |
 -
| Altmap| Altmap | .|Altmap| Altmap | ...
|  NS1  |  NS1   |  | NS2  |  NS2   |

In the above scenario, NS1 and NS2 are two namespaces. The vmemmap
for NS1 comes from Altmap NS1, which belongs to NS1, and the
vmemmap for NS2 comes from Altmap NS2, which belongs to NS2.

The vmemmap start for NS2 is not aligned, so Altmap NS2 is shared
by both NS1 and NS2. During the free operation in NS1, Altmap NS2
is not part of NS1's altmap, causing it to attempt to free an
invalid page.

With this patch
===
NS1 start   NS2 start
 _
| NS1   |NS2  |
 -
| Altmap| Altmap | .| Normal | Altmap | Altmap |...
|  NS1  |  NS1   |  |  Page  |  NS2   |  NS2   |

If the vmemmap start for NS2 is not aligned then we are allocating
a normal page. NS1 and NS2 vmemmap will be freed correctly.

Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a different 
vmemmap handling function")
Co-developed-by: Ritesh Harjani (IBM) 
Signed-off-by: Ritesh Harjani (IBM) 
Signed-off-by: Donet Tom 
---

Optimizations suggested by Aneesh and Ritesh regarding alig

Re: [PATCH 2/2] powerpc, bpf: Inline bpf_get_smp_processor_id()

2025-03-11 Thread Christophe Leroy





Le 11/03/2025 à 17:09, Saket Kumar Bhaskar a écrit :

[Vous ne recevez pas souvent de courriers de sk...@linux.ibm.com. Découvrez 
pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]

Inline the calls to bpf_get_smp_processor_id() in the powerpc bpf jit.

powerpc saves the Logical processor number (paca_index) in paca.

Here is how the powerpc JITed assembly changes after this commit:

Before:

cpu = bpf_get_smp_processor_id();

addis 12, 2, -517
addi 12, 12, -29456
mtctr 12
bctrl
mr  8, 3

After:

cpu = bpf_get_smp_processor_id();

lhz 8, 8(13)

To evaluate the performance improvements introduced by this change,
the benchmark described in [1] was employed.

+---+---+---+--+
|  Name |  Before   |After  |   % change   |
|---+---+---+--|
| glob-arr-inc  | 41.580 ± 0.034M/s | 54.137 ± 0.019M/s |   + 30.20%   |
| arr-inc   | 39.592 ± 0.055M/s | 54.000 ± 0.026M/s |   + 36.39%   |
| hash-inc  | 25.873 ± 0.012M/s | 26.334 ± 0.058M/s |   + 1.78%|
+---+---+---+--+



Nice improvement.

I see that bpf_get_current_task() could be inlined as well, on PPC32 it 
is in r2, on PPC64 it is in paca.



[1] 
https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fanakryiko%2Flinux%2Fcommit%2F8dec900975ef&data=05%7C02%7Cchristophe.leroy%40csgroup.eu%7C1d1f40ce41344cf1ecf508dd60b73ae0%7C8b87af7d86474dc78df45f69a2011bb5%7C0%7C0%7C638773062267813839%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=T%2BG206FHtW7hhFT1%2BXxRwN7pc%2BRzu8SiMlZ5njIlhB8%3D&reserved=0

Signed-off-by: Saket Kumar Bhaskar 
---
  arch/powerpc/net/bpf_jit_comp.c   | 10 ++
  arch/powerpc/net/bpf_jit_comp64.c |  5 +
  2 files changed, 15 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3d4bd45a9a22..4b79b2d95469 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -445,6 +445,16 @@ bool bpf_jit_supports_percpu_insn(void)
 return true;
  }

+bool bpf_jit_inlines_helper_call(s32 imm)
+{
+   switch (imm) {
+   case BPF_FUNC_get_smp_processor_id:
+   return true;
+   default:
+   return false;
+   }
+}


What about PPC32 ?



+
  void *arch_alloc_bpf_trampoline(unsigned int size)
  {
 return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 06f06770ceea..a8de12c026da 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1087,6 +1087,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
 case BPF_JMP | BPF_CALL:
 ctx->seen |= SEEN_FUNC;

+   if (insn[i].src_reg == 0 && imm == 
BPF_FUNC_get_smp_processor_id) {


Please use BPF_REG_0 instead of just 0.


+   EMIT(PPC_RAW_LHZ(bpf_to_ppc(BPF_REG_0), _R13, 
offsetof(struct paca_struct, paca_index)));


Can just use 'src_reg' instead of 'bpf_to_ppc(BPF_REG_0)'


+   break;
+   }
+
 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
 &func_addr, 
&func_addr_fixed);
 if (ret < 0)
--
2.43.5

71 matches

Mail list logo