date:20240123

Re: [PATCH net-next v2] vsock/test: add '--peer-port' input argument

2024-01-23 Thread Stefano Garzarella


On Tue, Jan 23, 2024 at 10:27:50AM +0300, Arseniy Krasnov wrote:

Implement port for given CID as input argument instead of using
hardcoded value '1234'. This allows to run different test instances
on a single CID. Port argument is not required parameter and if it is
not set, then default value will be '1234' - thus we preserve previous
behaviour.

Signed-off-by: Arseniy Krasnov 
---
Changelog:
v1 -> v2:
 * Reword usage message.
 * Add commas after last field in 'opts' declaration.
 * 'RFC' -> 'net-next'.


Thanks for the changes, LGTM!

Reviewed-by: Stefano Garzarella 



tools/testing/vsock/util.c|  17 +++-
tools/testing/vsock/util.h|   4 +
tools/testing/vsock/vsock_diag_test.c |  21 +++--
tools/testing/vsock/vsock_test.c  | 102 +-
tools/testing/vsock/vsock_test_zerocopy.c |  12 +--
tools/testing/vsock/vsock_uring_test.c|  17 +++-
6 files changed, 115 insertions(+), 58 deletions(-)

diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index ae2b33c21c45..554b290fefdc 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -33,8 +33,7 @@ void init_signals(void)
signal(SIGPIPE, SIG_IGN);
}

-/* Parse a CID in string representation */
-unsigned int parse_cid(const char *str)
+static unsigned int parse_uint(const char *str, const char *err_str)
{
char *endptr = NULL;
unsigned long n;
@@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str)
errno = 0;
n = strtoul(str, &endptr, 10);
if (errno || *endptr != '\0') {
-   fprintf(stderr, "malformed CID \"%s\"\n", str);
+   fprintf(stderr, "malformed %s \"%s\"\n", err_str, str);
exit(EXIT_FAILURE);
}
return n;
}

+/* Parse a CID in string representation */
+unsigned int parse_cid(const char *str)
+{
+   return parse_uint(str, "CID");
+}
+
+/* Parse a port in string representation */
+unsigned int parse_port(const char *str)
+{
+   return parse_uint(str, "port");
+}
+
/* Wait for the remote to close the connection */
void vsock_wait_remote_close(int fd)
{
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 03c88d0cb861..e95e62485959 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -12,10 +12,13 @@ enum test_mode {
TEST_MODE_SERVER
};

+#define DEFAULT_PEER_PORT  1234
+
/* Test runner options */
struct test_opts {
enum test_mode mode;
unsigned int peer_cid;
+   unsigned int peer_port;
};

/* A test case definition.  Test functions must print failures to stderr and
@@ -35,6 +38,7 @@ struct test_case {

void init_signals(void);
unsigned int parse_cid(const char *str);
+unsigned int parse_port(const char *str);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
   unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_diag_test.c 
b/tools/testing/vsock/vsock_diag_test.c
index fa927ad16f8a..9d61b1f1c4c3 100644
--- a/tools/testing/vsock/vsock_diag_test.c
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -342,7 +342,7 @@ static void test_listen_socket_server(const struct 
test_opts *opts)
} addr = {
.svm = {
.svm_family = AF_VSOCK,
-   .svm_port = 1234,
+   .svm_port = opts->peer_port,
.svm_cid = VMADDR_CID_ANY,
},
};
@@ -378,7 +378,7 @@ static void test_connect_client(const struct test_opts 
*opts)
LIST_HEAD(sockets);
struct vsock_stat *st;

-   fd = vsock_stream_connect(opts->peer_cid, 1234);
+   fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
@@ -403,7 +403,7 @@ static void test_connect_server(const struct test_opts 
*opts)
LIST_HEAD(sockets);
int client_fd;

-   client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+   client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
if (client_fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
@@ -461,6 +461,11 @@ static const struct option longopts[] = {
.has_arg = required_argument,
.val = 'p',
},
+   {
+   .name = "peer-port",
+   .has_arg = required_argument,
+   .val = 'q',
+   },
{
.name = "list",
.has_arg = no_argument,
@@ -481,7 +486,7 @@ static const struct option longopts[] = {

static void usage(void)
{
-   fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] 
--control-port= --mode=client|server --peer-cid= [--list] [--skip=]\n"
+   fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= 
--mode=client

[PATCH v7 0/4] remoteproc: qcom: Introduce DSP support for SM8650

2024-01-23 Thread Neil Armstrong

Add the bindings and driver changes for DSP support on the
SM8650 platform in order to enable the aDSP, cDSP and MPSS
subsystems to boot.

Compared to SM8550, where SM8650 uses the same dual firmware files,
(dtb file and main firmware) the memory zones requirement has changed:
- cDSP: now requires 2 memory zones to be configured as shared
  between the cDSP and the HLOS subsystem
- MPSS: In addition to the memory zone required for the SM8550
  MPSS, two more are required to be configured for MPSS
  usage only.

In order to handle this and avoid code duplication, the region_assign_*
code patch has been made more generic and is able handle multiple
DSP-only memory zones (for MPSS) or DSP-HLOS shared memory zones (cDSP)
in the same region_assign functions.

Dependencies: None

For convenience, a regularly refreshed linux-next based git tree containing
all the SM8650 related work is available at:
https://git.codelinaro.org/neil.armstrong/linux/-/tree/topic/sm8650/upstream/integ

Signed-off-by: Neil Armstrong 
---
Changes in v7:
- Rebased on v6.8-rc1
- Add another memory region for MPSS, in bindings, code and DT
  - Kepts Krzysztof's review on bindings after agreement on irc
  - Kept drivers patches reviews because it's only a miminal change (value 2 -> 
3)
- Link to v6: 
https://lore.kernel.org/r/20231218-topic-sm8650-upstream-remoteproc-v6-0-3d16b37f1...@linaro.org

Changes in v6:
- Rebased on next-20231218, last patch did not apply anymore
- Link to v5: 
https://lore.kernel.org/r/20231212-topic-sm8650-upstream-remoteproc-v5-0-e749a1a48...@linaro.org

Changes in v5:
- Rename _perms to _owners per Konrad suggestion
- Link to v4: 
https://lore.kernel.org/r/20231208-topic-sm8650-upstream-remoteproc-v4-0-a96c3e5f0...@linaro.org

Changes in v4:
- Collected review from Mukesh Ojha
- Fixed adsp_unassign_memory_region() as suggested by Mukesh Ojha
- Link to v3: 
https://lore.kernel.org/r/20231106-topic-sm8650-upstream-remoteproc-v3-0-dbd4cabae...@linaro.org

Changes in v3:
- Collected bindings review tags
- Small fixes suggested by Mukesh Ojha
- Link to v2: 
https://lore.kernel.org/r/20231030-topic-sm8650-upstream-remoteproc-v2-0-609ee572e...@linaro.org

Changes in v2:
- Fixed sm8650 entries in allOf:if:then to match Krzysztof's comments
- Collected reviewed-by on patch 3
- Link to v1: 
https://lore.kernel.org/r/20231025-topic-sm8650-upstream-remoteproc-v1-0-a8d20e4ce...@linaro.org

---
Neil Armstrong (4):
  dt-bindings: remoteproc: qcom,sm8550-pas: document the SM8650 PAS
  remoteproc: qcom: pas: make region assign more generic
  remoteproc: qcom: pas: Add SM8650 remoteproc support
  arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for 
mpss

 .../bindings/remoteproc/qcom,sm8550-pas.yaml   |  45 ++-
 arch/arm64/boot/dts/qcom/sm8650.dtsi   |   8 +-
 drivers/remoteproc/qcom_q6v5_pas.c | 150 -
 3 files changed, 167 insertions(+), 36 deletions(-)
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20231016-topic-sm8650-upstream-remoteproc-66a87eeb6fee

Best regards,
-- 
Neil Armstrong

[PATCH v7 1/4] dt-bindings: remoteproc: qcom,sm8550-pas: document the SM8650 PAS

2024-01-23 Thread Neil Armstrong

Document the DSP Peripheral Authentication Service on the SM8650 Platform.

Reviewed-by: Krzysztof Kozlowski 
Signed-off-by: Neil Armstrong 
---
 .../bindings/remoteproc/qcom,sm8550-pas.yaml   | 45 +-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml 
b/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml
index 58120829fb06..5f63b6b9a8f5 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml
@@ -19,6 +19,9 @@ properties:
   - qcom,sm8550-adsp-pas
   - qcom,sm8550-cdsp-pas
   - qcom,sm8550-mpss-pas
+  - qcom,sm8650-adsp-pas
+  - qcom,sm8650-cdsp-pas
+  - qcom,sm8650-mpss-pas
 
   reg:
 maxItems: 1
@@ -49,6 +52,8 @@ properties:
   - description: Memory region for main Firmware authentication
   - description: Memory region for Devicetree Firmware authentication
   - description: DSM Memory region
+  - description: DSM Memory region 2
+  - description: Memory region for Qlink Logging
 
 required:
   - compatible
@@ -63,6 +68,7 @@ allOf:
   enum:
 - qcom,sm8550-adsp-pas
 - qcom,sm8550-cdsp-pas
+- qcom,sm8650-adsp-pas
 then:
   properties:
 interrupts:
@@ -71,7 +77,26 @@ allOf:
   maxItems: 5
 memory-region:
   maxItems: 2
-else:
+  - if:
+  properties:
+compatible:
+  enum:
+- qcom,sm8650-cdsp-pas
+then:
+  properties:
+interrupts:
+  maxItems: 5
+interrupt-names:
+  maxItems: 5
+memory-region:
+  minItems: 3
+  maxItems: 3
+  - if:
+  properties:
+compatible:
+  enum:
+- qcom,sm8550-mpss-pas
+then:
   properties:
 interrupts:
   minItems: 6
@@ -79,12 +104,28 @@ allOf:
   minItems: 6
 memory-region:
   minItems: 3
+  maxItems: 3
+  - if:
+  properties:
+compatible:
+  enum:
+- qcom,sm8650-mpss-pas
+then:
+  properties:
+interrupts:
+  minItems: 6
+interrupt-names:
+  minItems: 6
+memory-region:
+  minItems: 5
+  maxItems: 5
 
   - if:
   properties:
 compatible:
   enum:
 - qcom,sm8550-adsp-pas
+- qcom,sm8650-adsp-pas
 then:
   properties:
 power-domains:
@@ -101,6 +142,7 @@ allOf:
 compatible:
   enum:
 - qcom,sm8550-mpss-pas
+- qcom,sm8650-mpss-pas
 then:
   properties:
 power-domains:
@@ -116,6 +158,7 @@ allOf:
 compatible:
   enum:
 - qcom,sm8550-cdsp-pas
+- qcom,sm8650-cdsp-pas
 then:
   properties:
 power-domains:

-- 
2.34.1

[PATCH v7 2/4] remoteproc: qcom: pas: make region assign more generic

2024-01-23 Thread Neil Armstrong

The current memory region assign only supports a single
memory region.

But new platforms introduces more regions to make the
memory requirements more flexible for various use cases.
Those new platforms also shares the memory region between the
DSP and HLOS.

To handle this, make the region assign more generic in order
to support more than a single memory region and also permit
setting the regions permissions as shared.

Reviewed-by: Mukesh Ojha 
Signed-off-by: Neil Armstrong 
---
 drivers/remoteproc/qcom_q6v5_pas.c | 100 -
 1 file changed, 66 insertions(+), 34 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index a9dd58608052..09e8ad9f08c4 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -33,6 +33,8 @@
 
 #define ADSP_DECRYPT_SHUTDOWN_DELAY_MS 100
 
+#define MAX_ASSIGN_COUNT 3
+
 struct adsp_data {
int crash_reason_smem;
const char *firmware_name;
@@ -51,6 +53,9 @@ struct adsp_data {
int ssctl_id;
 
int region_assign_idx;
+   int region_assign_count;
+   bool region_assign_shared;
+   int region_assign_vmid;
 };
 
 struct qcom_adsp {
@@ -87,15 +92,18 @@ struct qcom_adsp {
phys_addr_t dtb_mem_phys;
phys_addr_t mem_reloc;
phys_addr_t dtb_mem_reloc;
-   phys_addr_t region_assign_phys;
+   phys_addr_t region_assign_phys[MAX_ASSIGN_COUNT];
void *mem_region;
void *dtb_mem_region;
size_t mem_size;
size_t dtb_mem_size;
-   size_t region_assign_size;
+   size_t region_assign_size[MAX_ASSIGN_COUNT];
 
int region_assign_idx;
-   u64 region_assign_perms;
+   int region_assign_count;
+   bool region_assign_shared;
+   int region_assign_vmid;
+   u64 region_assign_owners[MAX_ASSIGN_COUNT];
 
struct qcom_rproc_glink glink_subdev;
struct qcom_rproc_subdev smd_subdev;
@@ -590,37 +598,53 @@ static int adsp_alloc_memory_region(struct qcom_adsp 
*adsp)
 
 static int adsp_assign_memory_region(struct qcom_adsp *adsp)
 {
-   struct reserved_mem *rmem = NULL;
-   struct qcom_scm_vmperm perm;
+   struct qcom_scm_vmperm perm[MAX_ASSIGN_COUNT];
struct device_node *node;
+   unsigned int perm_size;
+   int offset;
int ret;
 
if (!adsp->region_assign_idx)
return 0;
 
-   node = of_parse_phandle(adsp->dev->of_node, "memory-region", 
adsp->region_assign_idx);
-   if (node)
-   rmem = of_reserved_mem_lookup(node);
-   of_node_put(node);
-   if (!rmem) {
-   dev_err(adsp->dev, "unable to resolve shareable 
memory-region\n");
-   return -EINVAL;
-   }
+   for (offset = 0; offset < adsp->region_assign_count; ++offset) {
+   struct reserved_mem *rmem = NULL;
+
+   node = of_parse_phandle(adsp->dev->of_node, "memory-region",
+   adsp->region_assign_idx + offset);
+   if (node)
+   rmem = of_reserved_mem_lookup(node);
+   of_node_put(node);
+   if (!rmem) {
+   dev_err(adsp->dev, "unable to resolve shareable 
memory-region index %d\n",
+   offset);
+   return -EINVAL;
+   }
 
-   perm.vmid = QCOM_SCM_VMID_MSS_MSA;
-   perm.perm = QCOM_SCM_PERM_RW;
+   if (adsp->region_assign_shared)  {
+   perm[0].vmid = QCOM_SCM_VMID_HLOS;
+   perm[0].perm = QCOM_SCM_PERM_RW;
+   perm[1].vmid = adsp->region_assign_vmid;
+   perm[1].perm = QCOM_SCM_PERM_RW;
+   perm_size = 2;
+   } else {
+   perm[0].vmid = adsp->region_assign_vmid;
+   perm[0].perm = QCOM_SCM_PERM_RW;
+   perm_size = 1;
+   }
 
-   adsp->region_assign_phys = rmem->base;
-   adsp->region_assign_size = rmem->size;
-   adsp->region_assign_perms = BIT(QCOM_SCM_VMID_HLOS);
+   adsp->region_assign_phys[offset] = rmem->base;
+   adsp->region_assign_size[offset] = rmem->size;
+   adsp->region_assign_owners[offset] = BIT(QCOM_SCM_VMID_HLOS);
 
-   ret = qcom_scm_assign_mem(adsp->region_assign_phys,
- adsp->region_assign_size,
- &adsp->region_assign_perms,
- &perm, 1);
-   if (ret < 0) {
-   dev_err(adsp->dev, "assign memory failed\n");
-   return ret;
+   ret = qcom_scm_assign_mem(adsp->region_assign_phys[offset],
+ adsp->region_assign_size[offset],
+ &adsp->region_assign_owners[offset],
+ perm, perm_si

[PATCH v7 3/4] remoteproc: qcom: pas: Add SM8650 remoteproc support

2024-01-23 Thread Neil Armstrong

Add DSP Peripheral Authentication Service support for the SM8650 platform.

Reviewed-by: Dmitry Baryshkov 
Signed-off-by: Neil Armstrong 
---
 drivers/remoteproc/qcom_q6v5_pas.c | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index 09e8ad9f08c4..d0b1f0f38347 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -1213,6 +1213,53 @@ static const struct adsp_data sc7280_wpss_resource = {
.ssctl_id = 0x19,
 };
 
+static const struct adsp_data sm8650_cdsp_resource = {
+   .crash_reason_smem = 601,
+   .firmware_name = "cdsp.mdt",
+   .dtb_firmware_name = "cdsp_dtb.mdt",
+   .pas_id = 18,
+   .dtb_pas_id = 0x25,
+   .minidump_id = 7,
+   .auto_boot = true,
+   .proxy_pd_names = (char*[]){
+   "cx",
+   "mxc",
+   "nsp",
+   NULL
+   },
+   .load_state = "cdsp",
+   .ssr_name = "cdsp",
+   .sysmon_name = "cdsp",
+   .ssctl_id = 0x17,
+   .region_assign_idx = 2,
+   .region_assign_count = 1,
+   .region_assign_shared = true,
+   .region_assign_vmid = QCOM_SCM_VMID_CDSP,
+};
+
+static const struct adsp_data sm8650_mpss_resource = {
+   .crash_reason_smem = 421,
+   .firmware_name = "modem.mdt",
+   .dtb_firmware_name = "modem_dtb.mdt",
+   .pas_id = 4,
+   .dtb_pas_id = 0x26,
+   .minidump_id = 3,
+   .auto_boot = false,
+   .decrypt_shutdown = true,
+   .proxy_pd_names = (char*[]){
+   "cx",
+   "mss",
+   NULL
+   },
+   .load_state = "modem",
+   .ssr_name = "mpss",
+   .sysmon_name = "modem",
+   .ssctl_id = 0x12,
+   .region_assign_idx = 2,
+   .region_assign_count = 3,
+   .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA,
+};
+
 static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,msm8226-adsp-pil", .data = &adsp_resource_init},
{ .compatible = "qcom,msm8953-adsp-pil", .data = 
&msm8996_adsp_resource},
@@ -1268,6 +1315,9 @@ static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,sm8550-adsp-pas", .data = &sm8550_adsp_resource},
{ .compatible = "qcom,sm8550-cdsp-pas", .data = &sm8550_cdsp_resource},
{ .compatible = "qcom,sm8550-mpss-pas", .data = &sm8550_mpss_resource},
+   { .compatible = "qcom,sm8650-adsp-pas", .data = &sm8550_adsp_resource},
+   { .compatible = "qcom,sm8650-cdsp-pas", .data = &sm8650_cdsp_resource},
+   { .compatible = "qcom,sm8650-mpss-pas", .data = &sm8650_mpss_resource},
{ },
 };
 MODULE_DEVICE_TABLE(of, adsp_of_match);

-- 
2.34.1

[PATCH v7 4/4] arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for mpss

2024-01-23 Thread Neil Armstrong

The qlink_logging memory region is also used by the modem firmware,
add it to the reserved memories and add it to the MPSS memory regions.

Signed-off-by: Neil Armstrong 
---
 arch/arm64/boot/dts/qcom/sm8650.dtsi | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi 
b/arch/arm64/boot/dts/qcom/sm8650.dtsi
index 2df77123a8c7..7a1cbc823306 100644
--- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
@@ -525,6 +525,11 @@ qdss_mem: qdss@8280 {
no-map;
};
 
+   qlink_logging_mem: qlink-logging@8480 {
+   reg = <0 0x8480 0 0x20>;
+   no-map;
+   };
+
mpss_dsm_mem: mpss-dsm@86b0 {
reg = <0 0x86b0 0 0x490>;
no-map;
@@ -2627,7 +2632,8 @@ remoteproc_mpss: remoteproc@408 {
 "mss";
 
memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>,
-   <&mpss_dsm_mem>, <&mpss_dsm_mem_2>;
+   <&mpss_dsm_mem>, <&mpss_dsm_mem_2>,
+   <&qlink_logging_mem>;
 
qcom,qmp = <&aoss_qmp>;
 

-- 
2.34.1

[PATCH v4 0/7] support '%pd' and '%pD' for print file name

2024-01-23 Thread Ye Bin

During fault locating, the file name needs to be printed based on the
dentry/file address. The offset needs to be calculated each time, which
is troublesome. Similar to printk, kprobe supports printing file names
for dentry/file addresses.

Diff v4 vs v3:
1. Use "argv[i][idx + 3] == 'd'" instead of "argv[i][strlen(argv[i]) - 1] == 
'd'"
to judge print format in PATCH[4/7];

Diff v3 vs v2:
1. Return the index of where the suffix was found in str_has_suffix();

Diff v2 vs v1:
1. Use "%pd/%pD" print format instead of "pd/pD" print format;
2. Add "%pd/%pD" in README;
3. Expand "%pd/%pD" argument before parameter parsing;
4. Add more detail information in ftrace documentation;
5. Add test cases for new print format in selftests/ftrace;

Ye Bin (7):
  string.h: add str_has_suffix() helper for test string ends with
specify string
  tracing/probes: add traceprobe_expand_dentry_args() helper
  tracing/probes: support '%pd' type for print struct dentry's name
  tracing/probes: support '%pD' type for print struct file's name
  tracing: add new type "%pd/%pD" in readme_msg[]
  Documentation: tracing: add new type '%pd' and '%pD' for kprobe
  selftests/ftrace: add test cases for VFS type "%pd" and "%pD"

 Documentation/trace/kprobetrace.rst   |  6 +-
 include/linux/string.h| 28 +++
 kernel/trace/trace.c  |  2 +-
 kernel/trace/trace_kprobe.c   |  6 ++
 kernel/trace/trace_probe.c| 47 +++
 kernel/trace/trace_probe.h|  3 +
 .../ftrace/test.d/kprobe/kprobe_args_vfs.tc   | 79 +++
 7 files changed, 169 insertions(+), 2 deletions(-)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc

-- 
2.31.1

[PATCH v4 5/7] tracing: add new type "%pd/%pD" in readme_msg[]

2024-01-23 Thread Ye Bin

Signed-off-by: Ye Bin 
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c6fd934e9..13197d3b86bd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5745,7 +5745,7 @@ static const char readme_msg[] =
"\t   +|-[u](), \\imm-value, 
\\\"imm-string\"\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, 
symbol,\n"
"\t   b@/, ustring,\n"
-   "\t   symstr, \\[\\]\n"
+   "\t   symstr, %pd/%pD \\[\\]\n"
 #ifdef CONFIG_HIST_TRIGGERS
"\tfield:  ;\n"
"\tstype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
-- 
2.31.1

[PATCH v4 4/7] tracing/probes: support '%pD' type for print struct file's name

2024-01-23 Thread Ye Bin

Similar to '%pD' for printk, use '%pD' for print struct file's name.

Signed-off-by: Ye Bin 
---
 kernel/trace/trace_probe.c | 41 --
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index cc8bd7ea5341..a664633137a0 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt)"trace_probe: " fmt
 
 #include 
+#include 
 #include "trace_btf.h"
 
 #include "trace_probe.h"
@@ -1574,28 +1575,38 @@ int traceprobe_expand_dentry_args(int argc, const char 
*argv[], char *buf,
for (i = 0; i < argc; i++) {
size_t idx;
 
-   if (str_has_suffix(argv[i], ":%pd", &idx)) {
-   char *tmp = kstrdup(argv[i], GFP_KERNEL);
-   char *equal;
+   if (!str_has_suffix(argv[i], ":%pd", &idx) &&
+   !str_has_suffix(argv[i], ":%pD", &idx))
+   continue;
 
-   if (!tmp)
-   return -ENOMEM;
+   char *tmp = kstrdup(argv[i], GFP_KERNEL);
+   char *equal;
+
+   if (!tmp)
+   return -ENOMEM;
 
-   equal = strchr(tmp, '=');
-   if (equal)
-   *equal = '\0';
-   tmp[idx] = '\0';
+   equal = strchr(tmp, '=');
+   if (equal)
+   *equal = '\0';
+   tmp[idx] = '\0';
+   if (argv[i][idx + 3] == 'd')
ret = snprintf(buf + used, bufsize - used,
   "%s%s+0x0(+0x%zx(%s)):string",
   equal ? tmp : "", equal ? "=" : "",
   offsetof(struct dentry, d_name.name),
   equal ? equal + 1 : tmp);
-   kfree(tmp);
-   if (ret >= bufsize - used)
-   return -ENOMEM;
-   argv[i] = buf + used;
-   used += ret + 1;
-   }
+   else
+   ret = snprintf(buf + used, bufsize - used,
+  "%s%s+0x0(+0x%zx(+0x%zx(%s))):string",
+  equal ? tmp : "", equal ? "=" : "",
+  offsetof(struct dentry, d_name.name),
+  offsetof(struct file, f_path.dentry),
+  equal ? equal + 1 : tmp);
+   kfree(tmp);
+   if (ret >= bufsize - used)
+   return -ENOMEM;
+   argv[i] = buf + used;
+   used += ret + 1;
}
 
return 0;
-- 
2.31.1

[PATCH v4 1/7] string.h: add str_has_suffix() helper for test string ends with specify string

2024-01-23 Thread Ye Bin

str_has_suffix() is test string if ends with specify string, and also
this API may return the index of where the suffix was found.

Signed-off-by: Ye Bin 
---
 include/linux/string.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 433c207a01da..2fb0f22237fe 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -405,4 +405,32 @@ static __always_inline size_t str_has_prefix(const char 
*str, const char *prefix
return strncmp(str, prefix, len) == 0 ? len : 0;
 }
 
+/**
+ * str_has_suffix - Test if a string has a given suffix
+ * @str: The string to test
+ * @suffix: The string to see if @str ends with
+ * @index: The index into @str of where @suffix is if found (NULL to ignore)
+ *
+ * Returns:
+ * * strlen(@suffix) if @str ends with @suffix
+ * * 0 if @str does not end with @suffix
+ */
+static __always_inline size_t str_has_suffix(const char *str, const char 
*suffix,
+size_t *index)
+{
+   size_t len = strlen(suffix);
+   size_t str_len = strlen(str);
+
+   if (len > str_len)
+   return 0;
+
+   if (strncmp(str + str_len - len, suffix, len))
+   return 0;
+
+   if (index)
+   *index = str_len - len;
+
+   return len;
+}
+
 #endif /* _LINUX_STRING_H_ */
-- 
2.31.1

[PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name

2024-01-23 Thread Ye Bin

Similar to '%pd' for printk, use '%pd' for print struct dentry's name.

Signed-off-by: Ye Bin 
---
 kernel/trace/trace_kprobe.c | 6 ++
 kernel/trace/trace_probe.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c4c6e0e0068b..00b74530fbad 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char 
*argv[])
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
+   char dbuf[MAX_DENTRY_ARGS_LEN];
struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
 
switch (argv[0][0]) {
@@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char 
*argv[])
argv = new_argv;
}
 
+   ret = traceprobe_expand_dentry_args(argc, argv, dbuf,
+   MAX_DENTRY_ARGS_LEN);
+   if (ret)
+   goto out;
+
/* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
argc, is_return);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 553371a4e0b1..d9c053824975 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -34,6 +34,7 @@
 #define MAX_ARRAY_LEN  64
 #define MAX_ARG_NAME_LEN   32
 #define MAX_BTF_ARGS_LEN   128
+#define MAX_DENTRY_ARGS_LEN256
 #define MAX_STRING_SIZEPATH_MAX
 #define MAX_ARG_BUF_LEN(MAX_TRACE_ARGS * MAX_ARG_NAME_LEN)
 
-- 
2.31.1

[PATCH v4 2/7] tracing/probes: add traceprobe_expand_dentry_args() helper

2024-01-23 Thread Ye Bin

Add traceprobe_expand_dentry_args() to expand dentry args. this API is
prepare to support "%pd" print format for kprobe.

Signed-off-by: Ye Bin 
---
 kernel/trace/trace_probe.c | 36 
 kernel/trace/trace_probe.h |  2 ++
 2 files changed, 38 insertions(+)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 4dc74d73fc1d..cc8bd7ea5341 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1565,6 +1565,42 @@ const char **traceprobe_expand_meta_args(int argc, const 
char *argv[],
return ERR_PTR(ret);
 }
 
+int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf,
+ int bufsize)
+{
+   int i, used, ret;
+
+   used = 0;
+   for (i = 0; i < argc; i++) {
+   size_t idx;
+
+   if (str_has_suffix(argv[i], ":%pd", &idx)) {
+   char *tmp = kstrdup(argv[i], GFP_KERNEL);
+   char *equal;
+
+   if (!tmp)
+   return -ENOMEM;
+
+   equal = strchr(tmp, '=');
+   if (equal)
+   *equal = '\0';
+   tmp[idx] = '\0';
+   ret = snprintf(buf + used, bufsize - used,
+  "%s%s+0x0(+0x%zx(%s)):string",
+  equal ? tmp : "", equal ? "=" : "",
+  offsetof(struct dentry, d_name.name),
+  equal ? equal + 1 : tmp);
+   kfree(tmp);
+   if (ret >= bufsize - used)
+   return -ENOMEM;
+   argv[i] = buf + used;
+   used += ret + 1;
+   }
+   }
+
+   return 0;
+}
+
 void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
 {
clear_btf_context(ctx);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 850d9ecb6765..553371a4e0b1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -402,6 +402,8 @@ extern int traceprobe_parse_probe_arg(struct trace_probe 
*tp, int i,
 const char **traceprobe_expand_meta_args(int argc, const char *argv[],
 int *new_argc, char *buf, int bufsize,
 struct traceprobe_parse_context *ctx);
+extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char 
*buf,
+int bufsize);
 
 extern int traceprobe_update_arg(struct probe_arg *arg);
 extern void traceprobe_free_probe_arg(struct probe_arg *arg);
-- 
2.31.1

[PATCH v4 6/7] Documentation: tracing: add new type '%pd' and '%pD' for kprobe

2024-01-23 Thread Ye Bin

Similar to printk() '%pd' is for fetch dentry's name from struct dentry's
pointer, and '%pD' is for fetch file's name from struct file's pointer.

Signed-off-by: Ye Bin 
---
 Documentation/trace/kprobetrace.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/trace/kprobetrace.rst 
b/Documentation/trace/kprobetrace.rst
index bf9cecb69fc9..a1d12d65a8dc 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -58,7 +58,8 @@ Synopsis of kprobe_events
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
- (x8/x16/x32/x64), "char", "string", "ustring", "symbol", 
"symstr"
+ (x8/x16/x32/x64), VFS layer common type(%pd/%pD) for print
+  file name, "char", "string", "ustring", "symbol", "symstr"
   and bitfield are supported.
 
   (\*1) only for the probe on function entry (offs == 0). Note, this argument 
access
@@ -113,6 +114,9 @@ With 'symstr' type, you can filter the event with wildcard 
pattern of the
 symbols, and you don't need to solve symbol name by yourself.
 For $comm, the default type is "string"; any other type is invalid.
 
+VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or
+file's name from struct dentry's pointer or struct file's pointer.
+
 .. _user_mem_access:
 
 User Memory Access
-- 
2.31.1

[PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"

2024-01-23 Thread Ye Bin

This patch adds test cases for new print format type "%pd/%pD".The test cases
test the following items:
1. Test README if add "%pd/%pD" type;
2. Test "%pd" type for dput();
3. Test "%pD" type for vfs_read();

Signed-off-by: Ye Bin 
---
 .../ftrace/test.d/kprobe/kprobe_args_vfs.tc   | 79 +++
 1 file changed, 79 insertions(+)
 create mode 100644 
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index ..1d8edd294dd6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,79 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+  ARG1=%di
+;;
+i[3456]86)
+  ARG1=%ax
+;;
+aarch64)
+  ARG1=%x0
+;;
+arm*)
+  ARG1=%r0
+;;
+ppc64*)
+  ARG1=%r3
+;;
+ppc*)
+  ARG1=%r3
+;;
+s390*)
+  ARG1=%r2
+;;
+mips*)
+  ARG1=%r4
+;;
+loongarch*)
+  ARG1=%r4
+;;
+riscv*)
+  ARG1=%a0
+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+: "Test argument %pd/%pD in README"
+grep -q "%pd/%pD" README
+
+: "Test argument %pd with name"
+echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1"  events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
-- 
2.31.1

Re: [PATCH v7 3/4] remoteproc: qcom: pas: Add SM8650 remoteproc support

2024-01-23 Thread Mukesh Ojha





On 1/23/2024 2:21 PM, Neil Armstrong wrote:

Add DSP Peripheral Authentication Service support for the SM8650 platform.

Reviewed-by: Dmitry Baryshkov 
Signed-off-by: Neil Armstrong 
---
  drivers/remoteproc/qcom_q6v5_pas.c | 50 ++
  1 file changed, 50 insertions(+)

diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index 09e8ad9f08c4..d0b1f0f38347 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -1213,6 +1213,53 @@ static const struct adsp_data sc7280_wpss_resource = {
.ssctl_id = 0x19,
  };
  
+static const struct adsp_data sm8650_cdsp_resource = {

+   .crash_reason_smem = 601,
+   .firmware_name = "cdsp.mdt",
+   .dtb_firmware_name = "cdsp_dtb.mdt",
+   .pas_id = 18,
+   .dtb_pas_id = 0x25,
+   .minidump_id = 7,
+   .auto_boot = true,
+   .proxy_pd_names = (char*[]){
+   "cx",
+   "mxc",
+   "nsp",
+   NULL
+   },
+   .load_state = "cdsp",
+   .ssr_name = "cdsp",
+   .sysmon_name = "cdsp",
+   .ssctl_id = 0x17,
+   .region_assign_idx = 2,
+   .region_assign_count = 1,
+   .region_assign_shared = true,
+   .region_assign_vmid = QCOM_SCM_VMID_CDSP,
+};
+
+static const struct adsp_data sm8650_mpss_resource = {
+   .crash_reason_smem = 421,
+   .firmware_name = "modem.mdt",
+   .dtb_firmware_name = "modem_dtb.mdt",
+   .pas_id = 4,
+   .dtb_pas_id = 0x26,
+   .minidump_id = 3,
+   .auto_boot = false,
+   .decrypt_shutdown = true,
+   .proxy_pd_names = (char*[]){
+   "cx",
+   "mss",
+   NULL
+   },
+   .load_state = "modem",
+   .ssr_name = "mpss",
+   .sysmon_name = "modem",
+   .ssctl_id = 0x12,
+   .region_assign_idx = 2,
+   .region_assign_count = 3,


I see this has changed from 2 to 3 after qlink logging addition;


+   .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA,
+};
+
  static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,msm8226-adsp-pil", .data = &adsp_resource_init},
{ .compatible = "qcom,msm8953-adsp-pil", .data = 
&msm8996_adsp_resource},
@@ -1268,6 +1315,9 @@ static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,sm8550-adsp-pas", .data = &sm8550_adsp_resource},
{ .compatible = "qcom,sm8550-cdsp-pas", .data = &sm8550_cdsp_resource},
{ .compatible = "qcom,sm8550-mpss-pas", .data = &sm8550_mpss_resource},
+   { .compatible = "qcom,sm8650-adsp-pas", .data = &sm8550_adsp_resource},


Same as sm8550;

+   { .compatible = "qcom,sm8650-cdsp-pas", .data = &sm8650_cdsp_resource},
+   { .compatible = "qcom,sm8650-mpss-pas", .data = &sm8650_mpss_resource},


LGTM,

Acked-by: Mukesh Ojha 

-Mukesh


{ },
  };
  MODULE_DEVICE_TABLE(of, adsp_of_match);

Re: [PATCH v7 4/4] arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for mpss

2024-01-23 Thread Mukesh Ojha





On 1/23/2024 2:21 PM, Neil Armstrong wrote:

The qlink_logging memory region is also used by the modem firmware,
add it to the reserved memories and add it to the MPSS memory regions.

Signed-off-by: Neil Armstrong 


LGTM,

Reviewed-by: Mukesh Ojha 

-Mukesh


---
  arch/arm64/boot/dts/qcom/sm8650.dtsi | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi 
b/arch/arm64/boot/dts/qcom/sm8650.dtsi
index 2df77123a8c7..7a1cbc823306 100644
--- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
@@ -525,6 +525,11 @@ qdss_mem: qdss@8280 {
no-map;
};
  
+		qlink_logging_mem: qlink-logging@8480 {

+   reg = <0 0x8480 0 0x20>;
+   no-map;
+   };
+
mpss_dsm_mem: mpss-dsm@86b0 {
reg = <0 0x86b0 0 0x490>;
no-map;
@@ -2627,7 +2632,8 @@ remoteproc_mpss: remoteproc@408 {
 "mss";
  
  			memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>,

-   <&mpss_dsm_mem>, <&mpss_dsm_mem_2>;
+   <&mpss_dsm_mem>, <&mpss_dsm_mem_2>,
+   <&qlink_logging_mem>;
  
  			qcom,qmp = <&aoss_qmp>;

[PATCH net-next v3 2/5] page_frag: unify gfp bits for order 3 page allocation

2024-01-23 Thread Yunsheng Lin

Currently there seems to be three page frag implementions
which all try to allocate order 3 page, if that fails, it
then fail back to allocate order 0 page, and each of them
all allow order 3 page allocation to fail under certain
condition by using specific gfp bits.

The gfp bits for order 3 page allocation are different
between different implementation, __GFP_NOMEMALLOC is
or'd to forbid access to emergency reserves memory for
__page_frag_cache_refill(), but it is not or'd in other
implementions, __GFP_DIRECT_RECLAIM is masked off to avoid
direct reclaim in skb_page_frag_refill(), but it is not
masked off in __page_frag_cache_refill().

This patch unifies the gfp bits used between different
implementions by or'ing __GFP_NOMEMALLOC and masking off
__GFP_DIRECT_RECLAIM for order 3 page allocation to avoid
possible pressure for mm.

Signed-off-by: Yunsheng Lin 
Reviewed-by: Alexander Duyck 
CC: Alexander Duyck 
---
 drivers/vhost/net.c | 2 +-
 mm/page_alloc.c | 4 ++--
 net/core/sock.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f2ed7167c848..e574e21cc0ca 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -670,7 +670,7 @@ static bool vhost_net_page_frag_refill(struct vhost_net 
*net, unsigned int sz,
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
  __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY,
+ __GFP_NORETRY | __GFP_NOMEMALLOC,
  SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c0f7e67c4250..636145c29f70 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4685,8 +4685,8 @@ static struct page *__page_frag_cache_refill(struct 
page_frag_cache *nc,
gfp_t gfp = gfp_mask;
 
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-   gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
-   __GFP_NOMEMALLOC;
+   gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
+  __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..d4bc4269d7d7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2908,7 +2908,7 @@ bool skb_page_frag_refill(unsigned int sz, struct 
page_frag *pfrag, gfp_t gfp)
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
  __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY,
+ __GFP_NORETRY | __GFP_NOMEMALLOC,
  SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
-- 
2.33.0

[PATCH net-next v3 4/5] vhost/net: remove vhost_net_page_frag_refill()

2024-01-23 Thread Yunsheng Lin

The page frag in vhost_net_page_frag_refill() uses the
'struct page_frag' from skb_page_frag_refill(), but it's
implementation is similar to page_frag_alloc_align() now.

This patch removes vhost_net_page_frag_refill() by using
'struct page_frag_cache' instead of 'struct page_frag',
and allocating frag using page_frag_alloc_align().

The added benefit is that not only unifying the page frag
implementation a little, but also having about 0.5% performance
boost testing by using the vhost_net_test introduced in the
last patch.

Signed-off-by: Yunsheng Lin 
Acked-by: Jason Wang 
---
 drivers/vhost/net.c | 91 ++---
 1 file changed, 27 insertions(+), 64 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e574e21cc0ca..4b2fcb228a0a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -141,10 +141,8 @@ struct vhost_net {
unsigned tx_zcopy_err;
/* Flush in progress. Protected by tx vq lock. */
bool tx_flush;
-   /* Private page frag */
-   struct page_frag page_frag;
-   /* Refcount bias of page frag */
-   int refcnt_bias;
+   /* Private page frag cache */
+   struct page_frag_cache pf_cache;
 };
 
 static unsigned vhost_net_zcopy_mask __read_mostly;
@@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, 
size_t total_len)
   !vhost_vq_avail_empty(vq->dev, vq);
 }
 
-static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
-  struct page_frag *pfrag, gfp_t gfp)
-{
-   if (pfrag->page) {
-   if (pfrag->offset + sz <= pfrag->size)
-   return true;
-   __page_frag_cache_drain(pfrag->page, net->refcnt_bias);
-   }
-
-   pfrag->offset = 0;
-   net->refcnt_bias = 0;
-   if (SKB_FRAG_PAGE_ORDER) {
-   /* Avoid direct reclaim but allow kswapd to wake */
-   pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC,
- SKB_FRAG_PAGE_ORDER);
-   if (likely(pfrag->page)) {
-   pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
-   goto done;
-   }
-   }
-   pfrag->page = alloc_page(gfp);
-   if (likely(pfrag->page)) {
-   pfrag->size = PAGE_SIZE;
-   goto done;
-   }
-   return false;
-
-done:
-   net->refcnt_bias = USHRT_MAX;
-   page_ref_add(pfrag->page, USHRT_MAX - 1);
-   return true;
-}
-
 #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
 
 static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
@@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue 
*nvq,
struct vhost_net *net = container_of(vq->dev, struct vhost_net,
 dev);
struct socket *sock = vhost_vq_get_backend(vq);
-   struct page_frag *alloc_frag = &net->page_frag;
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
struct tun_xdp_hdr *hdr;
@@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue 
*nvq,
int sock_hlen = nvq->sock_hlen;
void *buf;
int copied;
+   int ret;
 
if (unlikely(len < nvq->sock_hlen))
return -EFAULT;
@@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue 
*nvq,
return -ENOSPC;
 
buflen += SKB_DATA_ALIGN(len + pad);
-   alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
-   if (unlikely(!vhost_net_page_frag_refill(net, buflen,
-alloc_frag, GFP_KERNEL)))
+   buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL,
+   SMP_CACHE_BYTES);
+   if (unlikely(!buf))
return -ENOMEM;
 
-   buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
-   copied = copy_page_from_iter(alloc_frag->page,
-alloc_frag->offset +
-offsetof(struct tun_xdp_hdr, gso),
-sock_hlen, from);
-   if (copied != sock_hlen)
-   return -EFAULT;
+   copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso),
+   sock_hlen, from);
+   if (copied != sock_hlen) {
+   ret = -EFAULT;
+   goto err;
+   }
 
hdr = buf;
gso = &hdr->gso;
@@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue 
*nvq,
   vhost16_to_cpu(vq, gso->csum_start) +
   vhost16_to_cpu(vq, gso->csum_offset) + 2);
 
-

[PATCH net-next v3 5/5] tools: virtio: introduce vhost_net_test

2024-01-23 Thread Yunsheng Lin

introduce vhost_net_test basing on virtio_test to test
vhost_net changing in the kernel.

Signed-off-by: Yunsheng Lin 
---
 tools/virtio/.gitignore   |   1 +
 tools/virtio/Makefile |   8 +-
 tools/virtio/vhost_net_test.c | 576 ++
 3 files changed, 582 insertions(+), 3 deletions(-)
 create mode 100644 tools/virtio/vhost_net_test.c

diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 9934d48d9a55..7e47b281c442 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.d
 virtio_test
+vhost_net_test
 vringh_test
 virtio-trace/trace-agent
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index d128925980e0..e25e99c1c3b7 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 all: test mod
-test: virtio_test vringh_test
+test: virtio_test vringh_test vhost_net_test
 virtio_test: virtio_ring.o virtio_test.o
 vringh_test: vringh_test.o vringh.o virtio_ring.o
+vhost_net_test: virtio_ring.o vhost_net_test.o
 
 try-run = $(shell set -e;  \
if ($(1)) >/dev/null 2>&1;  \
@@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean
 
 .PHONY: all test mod clean vhost oot oot-clean oot-build
 clean:
-   ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
-  vhost_test/Module.symvers vhost_test/modules.order *.d
+   ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \
+  vhost_test/.*.cmd vhost_test/Module.symvers \
+  vhost_test/modules.order *.d
 -include *.d
diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c
new file mode 100644
index ..e336792a0d77
--- /dev/null
+++ b/tools/virtio/vhost_net_test.c
@@ -0,0 +1,576 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define RANDOM_BATCH   -1
+#define HDR_LEN12
+#define TEST_BUF_LEN   256
+#define TEST_PTYPE ETH_P_LOOPBACK
+
+/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+   int kick;
+   int call;
+   int idx;
+   long started;
+   long completed;
+   struct pollfd fds;
+   void *ring;
+   /* copy used for control */
+   struct vring vring;
+   struct virtqueue *vq;
+};
+
+struct vdev_info {
+   struct virtio_device vdev;
+   int control;
+   struct vq_info vqs[2];
+   int nvqs;
+   void *buf;
+   size_t buf_size;
+   char *test_buf;
+   char *res_buf;
+   struct vhost_memory *mem;
+   int sock;
+   int ifindex;
+   unsigned char mac[ETHER_ADDR_LEN];
+};
+
+static int tun_alloc(struct vdev_info *dev)
+{
+   struct ifreq ifr;
+   int len = HDR_LEN;
+   int fd, e;
+
+   fd = open("/dev/net/tun", O_RDWR);
+   if (fd < 0) {
+   perror("Cannot open /dev/net/tun");
+   return fd;
+   }
+
+   memset(&ifr, 0, sizeof(ifr));
+
+   ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+   snprintf(ifr.ifr_name, IFNAMSIZ, "tun_%d", getpid());
+
+   e = ioctl(fd, TUNSETIFF, &ifr);
+   if (e < 0) {
+   perror("ioctl[TUNSETIFF]");
+   close(fd);
+   return e;
+   }
+
+   e = ioctl(fd, TUNSETVNETHDRSZ, &len);
+   if (e < 0) {
+   perror("ioctl[TUNSETVNETHDRSZ]");
+   close(fd);
+   return e;
+   }
+
+   e = ioctl(fd, SIOCGIFHWADDR, &ifr);
+   if (e < 0) {
+   perror("ioctl[SIOCGIFHWADDR]");
+   close(fd);
+   return e;
+   }
+
+   memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+   return fd;
+}
+
+static void vdev_create_socket(struct vdev_info *dev)
+{
+   struct ifreq ifr;
+
+   dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE));
+   assert(dev->sock != -1);
+
+   snprintf(ifr.ifr_name, IFNAMSIZ, "tun_%d", getpid());
+   assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0);
+
+   dev->ifindex = ifr.ifr_ifindex;
+
+   /* Set the flags that bring the device up */
+   assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0);
+   ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+   assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0);
+}
+
+static void vdev_send_packet(struct vdev_info *dev)
+{
+   char *sendbuf = dev->test_buf + HDR_LEN;
+   struct sockaddr_ll saddrll = {0};
+   int sockfd = dev->sock;
+   int ret;
+
+   saddrll.sll_family = PF_PACKET;
+   saddrll.sll_ifindex = dev->ifindex;
+   saddrll.sll_halen = ETH_ALEN;
+   sadd

[PATCH v12 0/6] Introducing trace buffer mapping by user-space

2024-01-23 Thread Vincent Donnefort

The tracing ring-buffers can be stored on disk or sent to network
without any copy via splice. However the later doesn't allow real time
processing of the traces. A solution is to give userspace direct access
to the ring-buffer pages via a mapping. An application can now become a
consumer of the ring-buffer, in a similar fashion to what trace_pipe
offers.

Support for this new feature can already be found in libtracefs from
version 1.8, when built with EXTRA_CFLAGS=-DFORCE_MMAP_ENABLE.

Vincent

v11 -> v12:
  * Fix code sample mmap bug.
  * Add logging in sample code.
  * Reset tracer in selftest.
  * Add a refcount for the snapshot users.
  * Prevent mapping when there are snapshot users and vice versa.
  * Refine the meta-page.
  * Fix types in the meta-page.
  * Collect Reviewed-by.

v10 -> v11:
  * Add Documentation and code sample.
  * Add a selftest.
  * Move all the update to the meta-page into a single
rb_update_meta_page().
  * rb_update_meta_page() is now called from
ring_buffer_map_get_reader() to fix NOBLOCK callers.
  * kerneldoc for struct trace_meta_page.
  * Add a patch to zero all the ring-buffer allocations.

v9 -> v10:
  * Refactor rb_update_meta_page()
  * In-loop declaration for foreach_subbuf_page()
  * Check for cpu_buffer->mapped overflow

v8 -> v9:
  * Fix the unlock path in ring_buffer_map()
  * Fix cpu_buffer cast with rb_work_rq->is_cpu_buffer
  * Rebase on linux-trace/for-next (3cb3091138ca0921c4569bcf7ffa062519639b6a)

v7 -> v8:
  * Drop the subbufs renaming into bpages
  * Use subbuf as a name when relevant

v6 -> v7:
  * Rebase onto lore.kernel.org/lkml/20231215175502.106587...@goodmis.org/
  * Support for subbufs
  * Rename subbufs into bpages

v5 -> v6:
  * Rebase on next-20230802.
  * (unsigned long) -> (void *) cast for virt_to_page().
  * Add a wait for the GET_READER_PAGE ioctl.
  * Move writer fields update (overrun/pages_lost/entries/pages_touched)
in the irq_work.
  * Rearrange id in struct buffer_page.
  * Rearrange the meta-page.
  * ring_buffer_meta_page -> trace_buffer_meta_page.
  * Add meta_struct_len into the meta-page.

v4 -> v5:
  * Trivial rebase onto 6.5-rc3 (previously 6.4-rc3)

v3 -> v4:
  * Add to the meta-page:
   - pages_lost / pages_read (allow to compute how full is the
 ring-buffer)
   - read (allow to compute how many entries can be read)
   - A reader_page struct.
  * Rename ring_buffer_meta_header -> ring_buffer_meta
  * Rename ring_buffer_get_reader_page -> ring_buffer_map_get_reader_page
  * Properly consume events on ring_buffer_map_get_reader_page() with
rb_advance_reader().

v2 -> v3:
  * Remove data page list (for non-consuming read)
** Implies removing order > 0 meta-page
  * Add a new meta page field ->read
  * Rename ring_buffer_meta_page_header into ring_buffer_meta_header

v1 -> v2:
  * Hide data_pages from the userspace struct
  * Fix META_PAGE_MAX_PAGES
  * Support for order > 0 meta-page
  * Add missing page->mapping.

Vincent Donnefort (6):
  ring-buffer: Zero ring-buffer sub-buffers
  ring-buffer: Introducing ring-buffer mapping functions
  tracing: Add snapshot refcount
  tracing: Allow user-space mapping of the ring-buffer
  Documentation: tracing: Add ring-buffer mapping
  ring-buffer/selftest: Add ring-buffer mapping test

 Documentation/trace/index.rst |   1 +
 Documentation/trace/ring-buffer-map.rst   | 106 ++
 include/linux/ring_buffer.h   |   7 +
 include/uapi/linux/trace_mmap.h   |  46 +++
 kernel/trace/ring_buffer.c| 338 +-
 kernel/trace/trace.c  | 208 ++-
 kernel/trace/trace.h  |   6 +
 kernel/trace/trace_events_trigger.c   |  57 ++-
 tools/testing/selftests/ring-buffer/Makefile  |   8 +
 tools/testing/selftests/ring-buffer/config|   1 +
 .../testing/selftests/ring-buffer/map_test.c  | 188 ++
 11 files changed, 925 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/trace/ring-buffer-map.rst
 create mode 100644 include/uapi/linux/trace_mmap.h
 create mode 100644 tools/testing/selftests/ring-buffer/Makefile
 create mode 100644 tools/testing/selftests/ring-buffer/config
 create mode 100644 tools/testing/selftests/ring-buffer/map_test.c


base-commit: 4f1991a92cfe89096b2d1f5583a2e093bdd55c37
-- 
2.43.0.429.g432eaa2c6b-goog

[PATCH v12 1/6] ring-buffer: Zero ring-buffer sub-buffers

2024-01-23 Thread Vincent Donnefort

In preparation for the ring-buffer memory mapping where each subbuf will
be accessible to user-space, zero all the page allocations.

Signed-off-by: Vincent Donnefort 
Reviewed-by: Masami Hiramatsu (Google) 

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 173d2595ce2d..db73e326fa04 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1466,7 +1466,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu 
*cpu_buffer,
 
list_add(&bpage->list, pages);
 
-   page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags,
+   page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+   mflags | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page)
goto free_pages;
@@ -1551,7 +1552,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long 
nr_pages, int cpu)
 
cpu_buffer->reader_page = bpage;
 
-   page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 
cpu_buffer->buffer->subbuf_order);
+   page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_ZERO,
+   cpu_buffer->buffer->subbuf_order);
if (!page)
goto fail_free_reader;
bpage->page = page_address(page);
@@ -5525,7 +5527,8 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, 
int cpu)
if (bpage->data)
goto out;
 
-   page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY,
+   page = alloc_pages_node(cpu_to_node(cpu),
+   GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page) {
kfree(bpage);
-- 
2.43.0.429.g432eaa2c6b-goog

[PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions

2024-01-23 Thread Vincent Donnefort

In preparation for allowing the user-space to map a ring-buffer, add
a set of mapping functions:

  ring_buffer_{map,unmap}()
  ring_buffer_map_fault()

And controls on the ring-buffer:

  ring_buffer_map_get_reader()  /* swap reader and head */

Mapping the ring-buffer also involves:

  A unique ID for each subbuf of the ring-buffer, currently they are
  only identified through their in-kernel VA.

  A meta-page, where are stored ring-buffer statistics and a
  description for the current reader

The linear mapping exposes the meta-page, and each subbuf of the
ring-buffer, ordered following their unique ID, assigned during the
first mapping.

Once mapped, no subbuf can get in or out of the ring-buffer: the buffer
size will remain unmodified and the splice enabling functions will in
reality simply memcpy the data instead of swapping subbufs.

Signed-off-by: Vincent Donnefort 

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index fa802db216f9..0841ba8bab14 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -6,6 +6,8 @@
 #include 
 #include 
 
+#include 
+
 struct trace_buffer;
 struct ring_buffer_iter;
 
@@ -221,4 +223,9 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct 
hlist_node *node);
 #define trace_rb_cpu_prepare   NULL
 #endif
 
+int ring_buffer_map(struct trace_buffer *buffer, int cpu);
+int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
+struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu,
+  unsigned long pgoff);
+int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
 #endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
new file mode 100644
index ..5468afc94be7
--- /dev/null
+++ b/include/uapi/linux/trace_mmap.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _TRACE_MMAP_H_
+#define _TRACE_MMAP_H_
+
+#include 
+
+/**
+ * struct trace_buffer_meta - Ring-buffer Meta-page description
+ * @meta_page_size:Size of this meta-page.
+ * @meta_struct_len:   Size of this structure.
+ * @subbuf_size:   Size of each subbuf, including the header.
+ * @nr_subbufs:Number of subbfs in the ring-buffer.
+ * @reader.lost_events:Number of events lost at the time of the reader 
swap.
+ * @reader.id: subbuf ID of the current reader. From 0 to @nr_subbufs 
- 1
+ * @reader.read:   Number of bytes read on the reader subbuf.
+ * @entries:   Number of entries in the ring-buffer.
+ * @overrun:   Number of entries lost in the ring-buffer.
+ * @read:  Number of entries that have been read.
+ * @subbufs_touched:   Number of subbufs that have been filled.
+ * @subbufs_lost:  Number of subbufs lost to overrun.
+ * @subbufs_read:  Number of subbufs that have been read.
+ */
+struct trace_buffer_meta {
+   __u32   meta_page_size;
+   __u32   meta_struct_len;
+
+   __u32   subbuf_size;
+   __u32   nr_subbufs;
+
+   struct {
+   __u64   lost_events;
+   __u32   id;
+   __u32   read;
+   } reader;
+
+   __u64   entries;
+   __u64   overrun;
+   __u64   read;
+
+   __u64   subbufs_touched;
+   __u64   subbufs_lost;
+};
+
+#endif /* _TRACE_MMAP_H_ */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index db73e326fa04..e7b39f7a1c74 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -338,6 +338,7 @@ struct buffer_page {
local_t  entries;   /* entries on this page */
unsigned longreal_end;  /* real end of data */
unsigned order; /* order of the page */
+   u32  id;/* ID for external mapping */
struct buffer_data_page *page;  /* Actual data page */
 };
 
@@ -484,6 +485,12 @@ struct ring_buffer_per_cpu {
u64 read_stamp;
/* pages removed since last reset */
unsigned long   pages_removed;
+
+   int mapped;
+   struct mutexmapping_lock;
+   unsigned long   *subbuf_ids;/* ID to addr */
+   struct trace_buffer_meta*meta_page;
+
/* ring buffer pages to update, > 0 to add, < 0 to remove */
longnr_pages_to_update;
struct list_headnew_pages; /* new pages to add */
@@ -1542,6 +1549,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long 
nr_pages, int cpu)
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&cpu_buffer->irq_work.waiters);
init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
+   mutex_init(&cpu_buffer->mapping_lock);
 
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_lin

[PATCH v12 3/6] tracing: Add snapshot refcount

2024-01-23 Thread Vincent Donnefort

When a ring-buffer is memory mapped by user-space, no trace or
ring-buffer swap is possible. This means the snapshot feature is
mutually exclusive with the memory mapping. Having a refcount on
snapshot users will help to know if a mapping is possible or not.

Signed-off-by: Vincent Donnefort 

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 46dbe22121e9..ac59321a8d95 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1300,6 +1300,52 @@ static void free_snapshot(struct trace_array *tr)
tr->allocated_snapshot = false;
 }
 
+static int tracing_arm_snapshot_locked(struct trace_array *tr)
+{
+   int ret;
+
+   lockdep_assert_held(&trace_types_lock);
+
+   if (tr->snapshot == UINT_MAX)
+   return -EBUSY;
+
+   ret = tracing_alloc_snapshot_instance(tr);
+   if (ret)
+   return ret;
+
+   tr->snapshot++;
+
+   return 0;
+}
+
+static void tracing_disarm_snapshot_locked(struct trace_array *tr)
+{
+   lockdep_assert_held(&trace_types_lock);
+
+   if (WARN_ON(!tr->snapshot))
+   return;
+
+   tr->snapshot--;
+}
+
+int tracing_arm_snapshot(struct trace_array *tr)
+{
+   int ret;
+
+   mutex_lock(&trace_types_lock);
+   ret = tracing_arm_snapshot_locked(tr);
+   mutex_unlock(&trace_types_lock);
+
+   return ret;
+}
+
+void tracing_disarm_snapshot(struct trace_array *tr)
+{
+   mutex_lock(&trace_types_lock);
+   tracing_disarm_snapshot_locked(tr);
+   mutex_unlock(&trace_types_lock);
+}
+
 /**
  * tracing_alloc_snapshot - allocate snapshot buffer.
  *
@@ -1373,10 +1419,6 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, 
void *cond_data,
 
mutex_lock(&trace_types_lock);
 
-   ret = tracing_alloc_snapshot_instance(tr);
-   if (ret)
-   goto fail_unlock;
-
if (tr->current_trace->use_max_tr) {
ret = -EBUSY;
goto fail_unlock;
@@ -1395,6 +1437,10 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, 
void *cond_data,
goto fail_unlock;
}
 
+   ret = tracing_arm_snapshot_locked(tr);
+   if (ret)
+   goto fail_unlock;
+
local_irq_disable();
arch_spin_lock(&tr->max_lock);
tr->cond_snapshot = cond_snapshot;
@@ -1439,6 +1485,8 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
arch_spin_unlock(&tr->max_lock);
local_irq_enable();
 
+   tracing_disarm_snapshot(tr);
+
return ret;
 }
 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
@@ -6579,11 +6627,12 @@ int tracing_set_tracer(struct trace_array *tr, const 
char *buf)
 */
synchronize_rcu();
free_snapshot(tr);
+   tracing_disarm_snapshot_locked(tr);
}
 
-   if (t->use_max_tr && !tr->allocated_snapshot) {
-   ret = tracing_alloc_snapshot_instance(tr);
-   if (ret < 0)
+   if (t->use_max_tr) {
+   ret = tracing_arm_snapshot_locked(tr);
+   if (ret)
goto out;
}
 #else
@@ -6592,8 +6641,11 @@ int tracing_set_tracer(struct trace_array *tr, const 
char *buf)
 
if (t->init) {
ret = tracer_init(t, tr);
-   if (ret)
+   if (ret) {
+   if (t->use_max_tr)
+   tracing_disarm_snapshot_locked(tr);
goto out;
+   }
}
 
tr->current_trace = t;
@@ -7695,10 +7747,11 @@ tracing_snapshot_write(struct file *filp, const char 
__user *ubuf, size_t cnt,
if (tr->allocated_snapshot)
ret = resize_buffer_duplicate_size(&tr->max_buffer,
&tr->array_buffer, iter->cpu_file);
-   else
-   ret = tracing_alloc_snapshot_instance(tr);
-   if (ret < 0)
+
+   ret = tracing_arm_snapshot_locked(tr);
+   if (ret)
break;
+
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
local_irq_disable();
@@ -7708,6 +7761,7 @@ tracing_snapshot_write(struct file *filp, const char 
__user *ubuf, size_t cnt,
smp_call_function_single(iter->cpu_file, 
tracing_swap_cpu_buffer,
 (void *)tr, 1);
}
+   tracing_disarm_snapshot_locked(tr);
break;
default:
if (tr->allocated_snapshot) {
@@ -8832,8 +8886,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, 
struct ftrace_hash *hash,
 
ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
 
-   if (glob[0] == '!')
-   return unregister_ftrace_function_probe_func(glob+1, tr, ops);
+   if (glob[0] == '!') {
+   ret = unregister_ftrace_fu

[PATCH v12 4/6] tracing: Allow user-space mapping of the ring-buffer

2024-01-23 Thread Vincent Donnefort

Currently, user-space extracts data from the ring-buffer via splice,
which is handy for storage or network sharing. However, due to splice
limitations, it is imposible to do real-time analysis without a copy.

A solution for that problem is to let the user-space map the ring-buffer
directly.

The mapping is exposed via the per-CPU file trace_pipe_raw. The first
element of the mapping is the meta-page. It is followed by each
subbuffer constituting the ring-buffer, ordered by their unique page ID:

  * Meta-page -- include/uapi/linux/trace_mmap.h for a description
  * Subbuf ID 0
  * Subbuf ID 1
 ...

It is therefore easy to translate a subbuf ID into an offset in the
mapping:

  reader_id = meta->reader->id;
  reader_offset = meta->meta_page_size + reader_id * meta->subbuf_size;

When new data is available, the mapper must call a newly introduced ioctl:
TRACE_MMAP_IOCTL_GET_READER. This will update the Meta-page reader ID to
point to the next reader containing unread data.

Signed-off-by: Vincent Donnefort 

diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index 5468afc94be7..09c4ae6d6e71 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -41,4 +41,6 @@ struct trace_buffer_meta {
__u64   subbufs_lost;
 };
 
+#define TRACE_MMAP_IOCTL_GET_READER_IO('T', 0x1)
+
 #endif /* _TRACE_MMAP_H_ */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ac59321a8d95..eb9e829ecc12 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1175,6 +1175,12 @@ static void tracing_snapshot_instance_cond(struct 
trace_array *tr,
return;
}
 
+   if (tr->mapped) {
+   trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
+   trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
+   return;
+   }
+
local_irq_save(flags);
update_max_tr(tr, current, smp_processor_id(), cond_data);
local_irq_restore(flags);
@@ -1309,6 +1315,9 @@ static int tracing_arm_snapshot_locked(struct trace_array 
*tr)
if (tr->snapshot == UINT_MAX)
return -EBUSY;
 
+   if (tr->mapped)
+   return -EBUSY;
+
ret = tracing_alloc_snapshot_instance(tr);
if (ret)
return ret;
@@ -6520,7 +6529,7 @@ static void tracing_set_nop(struct trace_array *tr)
 {
if (tr->current_trace == &nop_trace)
return;
-   
+
tr->current_trace->enabled--;
 
if (tr->current_trace->reset)
@@ -8637,15 +8646,31 @@ tracing_buffers_splice_read(struct file *file, loff_t 
*ppos,
return ret;
 }
 
-/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, 
unsigned long arg)
 {
struct ftrace_buffer_info *info = file->private_data;
struct trace_iterator *iter = &info->iter;
+   int err;
 
-   if (cmd)
-   return -ENOIOCTLCMD;
+   if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
+   if (!(file->f_flags & O_NONBLOCK)) {
+   err = ring_buffer_wait(iter->array_buffer->buffer,
+  iter->cpu_file,
+  iter->tr->buffer_percent);
+   if (err)
+   return err;
+   }
+
+   return ring_buffer_map_get_reader(iter->array_buffer->buffer,
+ iter->cpu_file);
+   } else if (cmd) {
+   return -ENOTTY;
+   }
 
+   /*
+* An ioctl call with cmd 0 to the ring buffer file will wake up all
+* waiters
+*/
mutex_lock(&trace_types_lock);
 
iter->wait_index++;
@@ -8658,6 +8683,90 @@ static long tracing_buffers_ioctl(struct file *file, 
unsigned int cmd, unsigned
return 0;
 }
 
+static vm_fault_t tracing_buffers_mmap_fault(struct vm_fault *vmf)
+{
+   struct ftrace_buffer_info *info = vmf->vma->vm_file->private_data;
+   struct trace_iterator *iter = &info->iter;
+   vm_fault_t ret = VM_FAULT_SIGBUS;
+   struct page *page;
+
+   page = ring_buffer_map_fault(iter->array_buffer->buffer, iter->cpu_file,
+vmf->pgoff);
+   if (!page)
+   return ret;
+
+   get_page(page);
+   vmf->page = page;
+   vmf->page->mapping = vmf->vma->vm_file->f_mapping;
+   vmf->page->index = vmf->pgoff;
+
+   return 0;
+}
+
+static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
+{
+   struct ftrace_buffer_info *info = vma->vm_file->private_data;
+   struct trace_iterator *iter = &info->iter;
+
+   ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+   mutex_lock(&trace_types_lock);
+   if (!WARN_ON(!iter->tr->mapped))
+   ite

[PATCH v12 5/6] Documentation: tracing: Add ring-buffer mapping

2024-01-23 Thread Vincent Donnefort

It is now possible to mmap() a ring-buffer to stream its content. Add
some documentation and a code example.

Signed-off-by: Vincent Donnefort 

diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index 5092d6c13af5..0b300901fd75 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -29,6 +29,7 @@ Linux Tracing Technologies
timerlat-tracer
intel_th
ring-buffer-design
+   ring-buffer-map
stm
sys-t
coresight/index
diff --git a/Documentation/trace/ring-buffer-map.rst 
b/Documentation/trace/ring-buffer-map.rst
new file mode 100644
index ..7d7513180893
--- /dev/null
+++ b/Documentation/trace/ring-buffer-map.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Tracefs ring-buffer memory mapping
+==
+
+:Author: Vincent Donnefort 
+
+Overview
+
+Tracefs ring-buffer memory map provides an efficient method to stream data
+as no memory copy is necessary. The application mapping the ring-buffer becomes
+then a consumer for that ring-buffer, in a similar fashion to trace_pipe.
+
+Memory mapping setup
+
+The mapping works with a mmap() of the trace_pipe_raw interface.
+
+The first system page of the mapping contains ring-buffer statistics and
+description. It is referred as the meta-page. One of the most important field 
of
+the meta-page is the reader. It contains the subbuf ID which can be safely read
+by the mapper (see ring-buffer-design.rst).
+
+The meta-page is followed by all the subbuf, ordered by ascendant ID. It is
+therefore effortless to know where the reader starts in the mapping:
+
+.. code-block:: c
+
+reader_id = meta->reader->id;
+reader_offset = meta->meta_page_size + reader_id * meta->subbuf_size;
+
+When the application is done with the current reader, it can get a new one 
using
+the trace_pipe_raw ioctl() TRACE_MMAP_IOCTL_GET_READER. This ioctl also updates
+the meta-page fields.
+
+Limitations
+===
+When a mapping is in place on a Tracefs ring-buffer, it is not possible to
+either resize it (either by increasing the entire size of the ring-buffer or
+each subbuf). It is also not possible to use snapshot or splice.
+
+Concurrent readers (either another application mapping that ring-buffer or the
+kernel with trace_pipe) are allowed but not recommended. They will compete for
+the ring-buffer and the output is unpredictable.
+
+Example
+===
+
+.. code-block:: c
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+#define TRACE_PIPE_RAW 
"/sys/kernel/tracing/per_cpu/cpu0/trace_pipe_raw"
+
+int main(void)
+{
+int page_size = getpagesize(), fd, reader_id;
+unsigned long meta_len, data_len;
+struct trace_buffer_meta *meta;
+void *map, *reader, *data;
+
+fd = open(TRACE_PIPE_RAW, O_RDONLY | O_NONBLOCK);
+if (fd < 0)
+exit(EXIT_FAILURE);
+
+map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+if (map == MAP_FAILED)
+exit(EXIT_FAILURE);
+
+meta = (struct trace_buffer_meta *)map;
+meta_len = meta->meta_page_size;
+
+printf("entries:%llu\n", meta->entries);
+printf("overrun:%llu\n", meta->overrun);
+printf("read:   %llu\n", meta->read);
+printf("subbufs_touched:%llu\n", meta->subbufs_touched);
+printf("subbufs_lost:   %llu\n", meta->subbufs_lost);
+printf("nr_subbufs: %u\n", meta->nr_subbufs);
+
+data_len = meta->subbuf_size * meta->nr_subbufs;
+data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, fd, 
meta_len);
+if (data == MAP_FAILED)
+exit(EXIT_FAILURE);
+
+if (ioctl(fd, TRACE_MMAP_IOCTL_GET_READER) < 0)
+exit(EXIT_FAILURE);
+
+reader_id = meta->reader.id;
+reader = data + meta->subbuf_size * reader_id;
+
+printf("Current reader address: %p\n", reader);
+
+munmap(data, data_len);
+munmap(meta, meta_len);
+close (fd);
+
+return 0;
+}
-- 
2.43.0.429.g432eaa2c6b-goog

Re: [PATCH v2 3/5] remoteproc: imx_rproc: Convert to dev_pm_domain_attach|detach_list()

2024-01-23 Thread Ulf Hansson

On Tue, 23 Jan 2024 at 02:27, Mathieu Poirier
 wrote:
>
> On Mon, Jan 22, 2024 at 01:02:08PM -0700, Mathieu Poirier wrote:
> > On Mon, 22 Jan 2024 at 10:51, Iuliana Prodan  wrote:
> > >
> > > On 1/5/2024 6:01 PM, Ulf Hansson wrote:
> > > > Let's avoid the boilerplate code to manage the multiple PM domain case, 
> > > > by
> > > > converting into using dev_pm_domain_attach|detach_list().
> > > >
> > > > Cc: Mathieu Poirier 
> > > > Cc: Bjorn Andersson 
> > > > Cc: Shawn Guo 
> > > > Cc: Sascha Hauer 
> > > > Cc: Iuliana Prodan 
> > > > Cc: Daniel Baluta 
> > > > Cc: 
> > > > Signed-off-by: Ulf Hansson 
> > > > ---
> > > >
> > > > Changes in v2:
> > > >   - None.
> > > >
> > > > Iuliana/Daniel I am ccing you to request help with test/review of this 
> > > > change.
> > > > Note that, you will need patch 1/5 in the series too, to be able to 
> > > > test this.
> > > >
> > > > Kind regards
> > > > Ulf Hansson
> > >
> > > Tested-by: Iuliana Prodan 
> > > Reviewed-by: Iuliana Prodan 
> > >
> >
> > Thanks for the leg-work on this.  I'll pick this up in rc1 later this week.
>
> Looking at the other files in this set, Ulf of perhaps Bjorn should take this
> set.

Yes, all patches in the series depend on the new helper function that
is introduced in patch 1.

I can certainly take the whole series via my pmdomain tree, if that's
fine by everyone.

Note that, there is also another series for genpd and qcom drivers
that might make it for v6.9. It may be best to keep all these things
together to avoid conflicts.

Let's see what Greg/Rafael thinks about patch 1 first though.

>
> for:
>
> drivers/remoteproc/imx_rproc.c
> drivers/remoteproc/imx_dsp_rproc.c
>
> Reviewed-by: Mathieu Poirier 

Thanks Mathieu and Iuliana!

[...]

Kind regards
Uffe

[1]
https://lore.kernel.org/all/20240122-gdsc-hwctrl-v4-0-9061e8a7a...@linaro.org/

Re: [PATCH v4 6/7] Documentation: tracing: add new type '%pd' and '%pD' for kprobe

2024-01-23 Thread Google

On Tue, 23 Jan 2024 17:21:38 +0800
Ye Bin  wrote:

> Similar to printk() '%pd' is for fetch dentry's name from struct dentry's
> pointer, and '%pD' is for fetch file's name from struct file's pointer.
> 
> Signed-off-by: Ye Bin 
> ---
>  Documentation/trace/kprobetrace.rst | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/trace/kprobetrace.rst 
> b/Documentation/trace/kprobetrace.rst
> index bf9cecb69fc9..a1d12d65a8dc 100644
> --- a/Documentation/trace/kprobetrace.rst
> +++ b/Documentation/trace/kprobetrace.rst
> @@ -58,7 +58,8 @@ Synopsis of kprobe_events
>NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
>FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
> (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
> -   (x8/x16/x32/x64), "char", "string", "ustring", "symbol", 
> "symstr"
> +   (x8/x16/x32/x64), VFS layer common type(%pd/%pD) for print
> +  file name, "char", "string", "ustring", "symbol", "symstr"

Could you remove the "for print file name" here?  Since this is not the place
where such precise information describes. I think following hunk is enough to
explain it.

Thank you,

>and bitfield are supported.
>  
>(\*1) only for the probe on function entry (offs == 0). Note, this 
> argument access
> @@ -113,6 +114,9 @@ With 'symstr' type, you can filter the event with 
> wildcard pattern of the
>  symbols, and you don't need to solve symbol name by yourself.
>  For $comm, the default type is "string"; any other type is invalid.
>  
> +VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or
> +file's name from struct dentry's pointer or struct file's pointer.
> +
>  .. _user_mem_access:
>  
>  User Memory Access
> -- 
> 2.31.1
> 
> 


-- 
Masami Hiramatsu (Google)

Re: [PATCH v4 2/7] tracing/probes: add traceprobe_expand_dentry_args() helper

2024-01-23 Thread Google

Hi Ye,

On Tue, 23 Jan 2024 17:21:34 +0800
Ye Bin  wrote:

> Add traceprobe_expand_dentry_args() to expand dentry args. this API is
> prepare to support "%pd" print format for kprobe.
> 
> Signed-off-by: Ye Bin 
> ---
>  kernel/trace/trace_probe.c | 36 
>  kernel/trace/trace_probe.h |  2 ++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 4dc74d73fc1d..cc8bd7ea5341 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -1565,6 +1565,42 @@ const char **traceprobe_expand_meta_args(int argc, 
> const char *argv[],
>   return ERR_PTR(ret);
>  }
>  
> +int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf,
> +   int bufsize)
> +{
> + int i, used, ret;
> +
> + used = 0;
> + for (i = 0; i < argc; i++) {
> + size_t idx;
> +
> + if (str_has_suffix(argv[i], ":%pd", &idx)) {

Instead of using this, I recommend to use `glob_match("*:%pd", argv[i])`
so that you can simply expand the pattern as `glob_match("*:%p[dD]",...)`
(glob_match means wildcard match like shell does)

Thank you,

> + char *tmp = kstrdup(argv[i], GFP_KERNEL);
> + char *equal;
> +
> + if (!tmp)
> + return -ENOMEM;
> +
> + equal = strchr(tmp, '=');
> + if (equal)
> + *equal = '\0';
> + tmp[idx] = '\0';
> + ret = snprintf(buf + used, bufsize - used,
nb> +  "%s%s+0x0(+0x%zx(%s)):string",
> +equal ? tmp : "", equal ? "=" : "",
> +offsetof(struct dentry, d_name.name),
> +equal ? equal + 1 : tmp);
> + kfree(tmp);
> + if (ret >= bufsize - used)
> + return -ENOMEM;
> + argv[i] = buf + used;
> + used += ret + 1;
> + }
> + }
> +
> + return 0;
> +}
> +
>  void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
>  {
>   clear_btf_context(ctx);
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 850d9ecb6765..553371a4e0b1 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -402,6 +402,8 @@ extern int traceprobe_parse_probe_arg(struct trace_probe 
> *tp, int i,
>  const char **traceprobe_expand_meta_args(int argc, const char *argv[],
>int *new_argc, char *buf, int bufsize,
>struct traceprobe_parse_context *ctx);
> +extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char 
> *buf,
> +  int bufsize);
>  
>  extern int traceprobe_update_arg(struct probe_arg *arg);
>  extern void traceprobe_free_probe_arg(struct probe_arg *arg);
> -- 
> 2.31.1
> 
> 


-- 
Masami Hiramatsu (Google)

Re: [PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name

2024-01-23 Thread Google

On Tue, 23 Jan 2024 17:21:35 +0800
Ye Bin  wrote:

> Similar to '%pd' for printk, use '%pd' for print struct dentry's name.
> 
> Signed-off-by: Ye Bin 
> ---
>  kernel/trace/trace_kprobe.c | 6 ++
>  kernel/trace/trace_probe.h  | 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index c4c6e0e0068b..00b74530fbad 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char 
> *argv[])
>   char buf[MAX_EVENT_NAME_LEN];
>   char gbuf[MAX_EVENT_NAME_LEN];
>   char abuf[MAX_BTF_ARGS_LEN];
> + char dbuf[MAX_DENTRY_ARGS_LEN];

Hmm, no, I don't like to expand stack anymore. Please allocate it
from heap.

>   struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
>  
>   switch (argv[0][0]) {
> @@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char 
> *argv[])
>   argv = new_argv;
>   }
>  
> + ret = traceprobe_expand_dentry_args(argc, argv, dbuf,
> + MAX_DENTRY_ARGS_LEN);
> + if (ret)
> + goto out;

And calling this here will not cover the trace_fprobe. 

Could you call this from traceprobe_expand_meta_args() instead of
calling it directly from trace_kprobe? Then it can be used from
fprobe_event too.

Thank you,

> +
>   /* setup a probe */
>   tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
>   argc, is_return);
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 553371a4e0b1..d9c053824975 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -34,6 +34,7 @@
>  #define MAX_ARRAY_LEN64
>  #define MAX_ARG_NAME_LEN 32
>  #define MAX_BTF_ARGS_LEN 128
> +#define MAX_DENTRY_ARGS_LEN  256

Why do you think 

>  #define MAX_STRING_SIZE  PATH_MAX
>  #define MAX_ARG_BUF_LEN  (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN)
>  
> -- 
> 2.31.1
> 
> 


-- 
Masami Hiramatsu (Google)

[PATCH 0/2] tracing/probes: Fix probe event argument parser

2024-01-23 Thread Masami Hiramatsu (Google)

These fix two bugs in the trace probe event argument parser, which I
found while I'm cleaning up the code.

One is just adding an error message to message log buffer (but it is
important to check the reason). Another is setting the size of argument
entry correctly with argument data updated by BTF.

Thank you,

---

Masami Hiramatsu (Google) (2):
  tracing/probes: Fix to show a parse error for bad type for $comm
  tracing/probes: Fix to set arg size and fmt after setting type from BTF


 kernel/trace/trace_probe.c |   32 ++--
 kernel/trace/trace_probe.h |3 ++-
 2 files changed, 20 insertions(+), 15 deletions(-)

--
Masami Hiramatsu (Google)

[PATCH 1/2] tracing/probes: Fix to show a parse error for bad type for $comm

2024-01-23 Thread Masami Hiramatsu (Google)

From: Masami Hiramatsu (Google) 

Fix to show a parse error for bad type (non-string) for $comm/$COMM and
immediate-string. With this fix, error_log file shows appropriate error
message as below.

 /sys/kernel/tracing # echo 'p vfs_read $comm:u32' >> kprobe_events
sh: write error: Invalid argument
 /sys/kernel/tracing # echo 'p vfs_read \"hoge":u32' >> kprobe_events
sh: write error: Invalid argument
 /sys/kernel/tracing # cat error_log

[   30.144183] trace_kprobe: error: $comm and immediate-string only accepts 
string type
  Command: p vfs_read $comm:u32
^
[   62.618500] trace_kprobe: error: $comm and immediate-string only accepts 
string type
  Command: p vfs_read \"hoge":u32
  ^
Fixes: 3dd1f7f24f8c ("tracing: probeevent: Fix to make the type of $comm 
string")
Cc: sta...@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_probe.c |7 +--
 kernel/trace/trace_probe.h |3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 4dc74d73fc1d..c6da5923e5b9 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char 
*argv, ssize_t *size,
if (!(ctx->flags & TPARG_FL_TEVENT) &&
(strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
 strncmp(arg, "\\\"", 2) == 0)) {
-   /* The type of $comm must be "string", and not an array. */
-   if (parg->count || (t && strcmp(t, "string")))
+   /* The type of $comm must be "string", and not an array type. */
+   if (parg->count || (t && strcmp(t, "string"))) {
+   trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0),
+   NEED_STRING_TYPE);
goto out;
+   }
parg->type = find_fetch_type("string", ctx->flags);
} else
parg->type = find_fetch_type(t, ctx->flags);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 850d9ecb6765..c1877d018269 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct 
trace_event_call *event_call,
C(BAD_HYPHEN,   "Failed to parse single hyphen. Forgot '>'?"),  
\
C(NO_BTF_FIELD, "This field is not found."),\
C(BAD_BTF_TID,  "Failed to get BTF type info."),\
-   C(BAD_TYPE4STR, "This type does not fit for string."),
+   C(BAD_TYPE4STR, "This type does not fit for string."),\
+   C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string 
type"),
 
 #undef C
 #define C(a, b)TP_ERR_##a

[PATCH 2/2] tracing/probes: Fix to set arg size and fmt after setting type from BTF

2024-01-23 Thread Masami Hiramatsu (Google)

From: Masami Hiramatsu (Google) 

Since the BTF type setting updates probe_arg::type, the type size
calculation and setting print-fmt should be done after that.
Without this fix, the argument size and print-fmt can be wrong.

Fixes: b576e09701c7 ("tracing/probes: Support function parameters if BTF is 
available")
Cc: sta...@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/trace_probe.c |   25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c6da5923e5b9..34289f9c6707 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1172,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char 
*argv, ssize_t *size,
trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), 
BAD_TYPE);
goto out;
}
-   parg->offset = *size;
-   *size += parg->type->size * (parg->count ?: 1);
-
-   ret = -ENOMEM;
-   if (parg->count) {
-   len = strlen(parg->type->fmttype) + 6;
-   parg->fmt = kmalloc(len, GFP_KERNEL);
-   if (!parg->fmt)
-   goto out;
-   snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
-parg->count);
-   }
 
code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
if (!code)
@@ -1207,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char 
*argv, ssize_t *size,
goto fail;
}
}
+   parg->offset = *size;
+   *size += parg->type->size * (parg->count ?: 1);
+
+   if (parg->count) {
+   len = strlen(parg->type->fmttype) + 6;
+   parg->fmt = kmalloc(len, GFP_KERNEL);
+   if (!parg->fmt) {
+   ret = -ENOMEM;
+   goto out;
+   }
+   snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+parg->count);
+   }
 
ret = -EINVAL;
/* Store operation */

Re: [PATCH net-next v2] vsock/test: add '--peer-port' input argument

2024-01-23 Thread Michael S. Tsirkin

On Tue, Jan 23, 2024 at 10:27:50AM +0300, Arseniy Krasnov wrote:
> Implement port for given CID as input argument instead of using
> hardcoded value '1234'. This allows to run different test instances
> on a single CID. Port argument is not required parameter and if it is
> not set, then default value will be '1234' - thus we preserve previous
> behaviour.
> 
> Signed-off-by: Arseniy Krasnov 

Acked-by: Michael S. Tsirkin 

> ---
>  Changelog:
>  v1 -> v2:
>   * Reword usage message.
>   * Add commas after last field in 'opts' declaration.
>   * 'RFC' -> 'net-next'.
> 
>  tools/testing/vsock/util.c|  17 +++-
>  tools/testing/vsock/util.h|   4 +
>  tools/testing/vsock/vsock_diag_test.c |  21 +++--
>  tools/testing/vsock/vsock_test.c  | 102 +-
>  tools/testing/vsock/vsock_test_zerocopy.c |  12 +--
>  tools/testing/vsock/vsock_uring_test.c|  17 +++-
>  6 files changed, 115 insertions(+), 58 deletions(-)
> 
> diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
> index ae2b33c21c45..554b290fefdc 100644
> --- a/tools/testing/vsock/util.c
> +++ b/tools/testing/vsock/util.c
> @@ -33,8 +33,7 @@ void init_signals(void)
>   signal(SIGPIPE, SIG_IGN);
>  }
>  
> -/* Parse a CID in string representation */
> -unsigned int parse_cid(const char *str)
> +static unsigned int parse_uint(const char *str, const char *err_str)
>  {
>   char *endptr = NULL;
>   unsigned long n;
> @@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str)
>   errno = 0;
>   n = strtoul(str, &endptr, 10);
>   if (errno || *endptr != '\0') {
> - fprintf(stderr, "malformed CID \"%s\"\n", str);
> + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str);
>   exit(EXIT_FAILURE);
>   }
>   return n;
>  }
>  
> +/* Parse a CID in string representation */
> +unsigned int parse_cid(const char *str)
> +{
> + return parse_uint(str, "CID");
> +}
> +
> +/* Parse a port in string representation */
> +unsigned int parse_port(const char *str)
> +{
> + return parse_uint(str, "port");
> +}
> +
>  /* Wait for the remote to close the connection */
>  void vsock_wait_remote_close(int fd)
>  {
> diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
> index 03c88d0cb861..e95e62485959 100644
> --- a/tools/testing/vsock/util.h
> +++ b/tools/testing/vsock/util.h
> @@ -12,10 +12,13 @@ enum test_mode {
>   TEST_MODE_SERVER
>  };
>  
> +#define DEFAULT_PEER_PORT1234
> +
>  /* Test runner options */
>  struct test_opts {
>   enum test_mode mode;
>   unsigned int peer_cid;
> + unsigned int peer_port;
>  };
>  
>  /* A test case definition.  Test functions must print failures to stderr and
> @@ -35,6 +38,7 @@ struct test_case {
>  
>  void init_signals(void);
>  unsigned int parse_cid(const char *str);
> +unsigned int parse_port(const char *str);
>  int vsock_stream_connect(unsigned int cid, unsigned int port);
>  int vsock_bind_connect(unsigned int cid, unsigned int port,
>  unsigned int bind_port, int type);
> diff --git a/tools/testing/vsock/vsock_diag_test.c 
> b/tools/testing/vsock/vsock_diag_test.c
> index fa927ad16f8a..9d61b1f1c4c3 100644
> --- a/tools/testing/vsock/vsock_diag_test.c
> +++ b/tools/testing/vsock/vsock_diag_test.c
> @@ -342,7 +342,7 @@ static void test_listen_socket_server(const struct 
> test_opts *opts)
>   } addr = {
>   .svm = {
>   .svm_family = AF_VSOCK,
> - .svm_port = 1234,
> + .svm_port = opts->peer_port,
>   .svm_cid = VMADDR_CID_ANY,
>   },
>   };
> @@ -378,7 +378,7 @@ static void test_connect_client(const struct test_opts 
> *opts)
>   LIST_HEAD(sockets);
>   struct vsock_stat *st;
>  
> - fd = vsock_stream_connect(opts->peer_cid, 1234);
> + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>   if (fd < 0) {
>   perror("connect");
>   exit(EXIT_FAILURE);
> @@ -403,7 +403,7 @@ static void test_connect_server(const struct test_opts 
> *opts)
>   LIST_HEAD(sockets);
>   int client_fd;
>  
> - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
> + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
>   if (client_fd < 0) {
>   perror("accept");
>   exit(EXIT_FAILURE);
> @@ -461,6 +461,11 @@ static const struct option longopts[] = {
>   .has_arg = required_argument,
>   .val = 'p',
>   },
> + {
> + .name = "peer-port",
> + .has_arg = required_argument,
> + .val = 'q',
> + },
>   {
>   .name = "list",
>   .has_arg = no_argument,
> @@ -481,7 +486,7 @@ static const struct option longopts[] = {
>  
>  static void usage(void)
>  {
> - fprintf(stderr, "Usage: vsock_diag_test [--help] 
> [--control-host=] -

Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions

2024-01-23 Thread Steven Rostedt

On Tue, 23 Jan 2024 11:07:53 +
Vincent Donnefort  wrote:

> index ..5468afc94be7
> --- /dev/null
> +++ b/include/uapi/linux/trace_mmap.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef _TRACE_MMAP_H_
> +#define _TRACE_MMAP_H_
> +
> +#include 
> +
> +/**
> + * struct trace_buffer_meta - Ring-buffer Meta-page description
> + * @meta_page_size:  Size of this meta-page.
> + * @meta_struct_len: Size of this structure.
> + * @subbuf_size: Size of each subbuf, including the header.
> + * @nr_subbufs:  Number of subbfs in the ring-buffer.
> + * @reader.lost_events:  Number of events lost at the time of the reader 
> swap.
> + * @reader.id:   subbuf ID of the current reader. From 0 to 
> @nr_subbufs - 1
> + * @reader.read: Number of bytes read on the reader subbuf.
> + * @entries: Number of entries in the ring-buffer.
> + * @overrun: Number of entries lost in the ring-buffer.
> + * @read:Number of entries that have been read.


> + * @subbufs_touched: Number of subbufs that have been filled.
> + * @subbufs_lost:Number of subbufs lost to overrun.
> + * @subbufs_read:Number of subbufs that have been read.

Do we actually need the above 3 fields?

What's the use case for them? I don't want to expose internals in the API
unless they are needed.

-- Steve

[PATCH 4/4] hwspinlock: omap: Use index to get hwspinlock pointer

2024-01-23 Thread Andrew Davis

For loops with multiple initializers and increments are hard to read
and reason about, simplify this by using the looping index to index
into the hwspinlock array.

Signed-off-by: Andrew Davis 
---
 drivers/hwspinlock/omap_hwspinlock.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/hwspinlock/omap_hwspinlock.c 
b/drivers/hwspinlock/omap_hwspinlock.c
index 1b0a1bea2b24a..a2555c41320d8 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -75,7 +75,6 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = {
 static int omap_hwspinlock_probe(struct platform_device *pdev)
 {
struct hwspinlock_device *bank;
-   struct hwspinlock *hwlock;
void __iomem *io_base;
int num_locks, i, ret;
/* Only a single hwspinlock block device is supported */
@@ -117,8 +116,8 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
if (!bank)
return -ENOMEM;
 
-   for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
-   hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
+   for (i = 0; i < num_locks; i++)
+   bank->lock[i].priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * 
i;
 
ret = devm_hwspin_lock_register(&pdev->dev, bank, &omap_hwspinlock_ops,
base_id, num_locks);
-- 
2.39.2

[PATCH 3/4] hwspinlock: omap: Use devm_hwspin_lock_register() helper

2024-01-23 Thread Andrew Davis

This unregister the HW spinlock on module exit, allowing us to
remove the remove callback. Do this here.

Signed-off-by: Andrew Davis 
---
 drivers/hwspinlock/omap_hwspinlock.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/hwspinlock/omap_hwspinlock.c 
b/drivers/hwspinlock/omap_hwspinlock.c
index 2f18ea6c05e3f..1b0a1bea2b24a 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -117,12 +117,10 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
if (!bank)
return -ENOMEM;
 
-   platform_set_drvdata(pdev, bank);
-
for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
 
-   ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
+   ret = devm_hwspin_lock_register(&pdev->dev, bank, &omap_hwspinlock_ops,
base_id, num_locks);
if (ret)
return ret;
@@ -133,18 +131,6 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
return 0;
 }
 
-static void omap_hwspinlock_remove(struct platform_device *pdev)
-{
-   struct hwspinlock_device *bank = platform_get_drvdata(pdev);
-   int ret;
-
-   ret = hwspin_lock_unregister(bank);
-   if (ret) {
-   dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
-   return;
-   }
-}
-
 static const struct of_device_id omap_hwspinlock_of_match[] = {
{ .compatible = "ti,omap4-hwspinlock", },
{ .compatible = "ti,am64-hwspinlock", },
@@ -155,7 +141,6 @@ MODULE_DEVICE_TABLE(of, omap_hwspinlock_of_match);
 
 static struct platform_driver omap_hwspinlock_driver = {
.probe  = omap_hwspinlock_probe,
-   .remove_new = omap_hwspinlock_remove,
.driver = {
.name   = "omap_hwspinlock",
.of_match_table = omap_hwspinlock_of_match,
-- 
2.39.2

[PATCH 2/4] hwspinlock: omap: Use devm_pm_runtime_enable() helper

2024-01-23 Thread Andrew Davis

This disables runtime PM on module exit, allowing us to simplify
the probe exit path and remove callbacks. Do that here.

Signed-off-by: Andrew Davis 
---
 drivers/hwspinlock/omap_hwspinlock.c | 26 --
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/hwspinlock/omap_hwspinlock.c 
b/drivers/hwspinlock/omap_hwspinlock.c
index cca55143d24d4..2f18ea6c05e3f 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -89,10 +89,10 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
 * make sure the module is enabled and clocked before reading
 * the module SYSSTATUS register
 */
-   pm_runtime_enable(&pdev->dev);
+   devm_pm_runtime_enable(&pdev->dev);
ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0)
-   goto runtime_err;
+   return ret;
 
/* Determine number of locks */
i = readl(io_base + SYSSTATUS_OFFSET);
@@ -104,22 +104,18 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
 */
ret = pm_runtime_put(&pdev->dev);
if (ret < 0)
-   goto runtime_err;
+   return ret;
 
/* one of the four lsb's must be set, and nothing else */
-   if (hweight_long(i & 0xf) != 1 || i > 8) {
-   ret = -EINVAL;
-   goto runtime_err;
-   }
+   if (hweight_long(i & 0xf) != 1 || i > 8)
+   return -EINVAL;
 
num_locks = i * 32; /* actual number of locks in this device */
 
bank = devm_kzalloc(&pdev->dev, struct_size(bank, lock, num_locks),
GFP_KERNEL);
-   if (!bank) {
-   ret = -ENOMEM;
-   goto runtime_err;
-   }
+   if (!bank)
+   return -ENOMEM;
 
platform_set_drvdata(pdev, bank);
 
@@ -129,16 +125,12 @@ static int omap_hwspinlock_probe(struct platform_device 
*pdev)
ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
base_id, num_locks);
if (ret)
-   goto runtime_err;
+   return ret;
 
dev_dbg(&pdev->dev, "Registered %d locks with HwSpinlock core\n",
num_locks);
 
return 0;
-
-runtime_err:
-   pm_runtime_disable(&pdev->dev);
-   return ret;
 }
 
 static void omap_hwspinlock_remove(struct platform_device *pdev)
@@ -151,8 +143,6 @@ static void omap_hwspinlock_remove(struct platform_device 
*pdev)
dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
return;
}
-
-   pm_runtime_disable(&pdev->dev);
 }
 
 static const struct of_device_id omap_hwspinlock_of_match[] = {
-- 
2.39.2

[PATCH 1/4] hwspinlock: omap: Remove unneeded check for OF node

2024-01-23 Thread Andrew Davis

We do not use the OF node anymore, nor does it matter how
we got to probe, so remove the check for of_node.

Signed-off-by: Andrew Davis 
---
 drivers/hwspinlock/omap_hwspinlock.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/hwspinlock/omap_hwspinlock.c 
b/drivers/hwspinlock/omap_hwspinlock.c
index a9fd9ca45f2a8..cca55143d24d4 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -74,7 +74,6 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = {
 
 static int omap_hwspinlock_probe(struct platform_device *pdev)
 {
-   struct device_node *node = pdev->dev.of_node;
struct hwspinlock_device *bank;
struct hwspinlock *hwlock;
void __iomem *io_base;
@@ -82,9 +81,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
/* Only a single hwspinlock block device is supported */
int base_id = 0;
 
-   if (!node)
-   return -ENODEV;
-
io_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(io_base))
return PTR_ERR(io_base);
-- 
2.39.2

Re: [PATCH v7 04/15] x86/sgx: Implement basic EPC misc cgroup functionality

2024-01-23 Thread Haitao Huang

On Mon, 22 Jan 2024 14:25:53 -0600, Jarkko Sakkinen   
wrote:



On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote:

From: Kristen Carlson Accardi 

SGX Enclave Page Cache (EPC) memory allocations are separate from normal
RAM allocations, and are managed solely by the SGX subsystem. The
existing cgroup memory controller cannot be used to limit or account for
SGX EPC memory, which is a desirable feature in some environments.  For
example, in a Kubernates environment, a user can request certain EPC
quota for a pod but the orchestrator can not enforce the quota to limit
runtime EPC usage of the pod without an EPC cgroup controller.

Utilize the misc controller [admin-guide/cgroup-v2.rst, 5-9. Misc] to
limit and track EPC allocations per cgroup. Earlier patches have added
the "sgx_epc" resource type in the misc cgroup subsystem. Add basic
support in SGX driver as the "sgx_epc" resource provider:

- Set "capacity" of EPC by calling misc_cg_set_capacity()
- Update EPC usage counter, "current", by calling charge and uncharge
APIs for EPC allocation and deallocation, respectively.
- Setup sgx_epc resource type specific callbacks, which perform
initialization and cleanup during cgroup allocation and deallocation,
respectively.

With these changes, the misc cgroup controller enables user to set a  
hard

limit for EPC usage in the "misc.max" interface file. It reports current
usage in "misc.current", the total EPC memory available in
"misc.capacity", and the number of times EPC usage reached the max limit
in "misc.events".

For now, the EPC cgroup simply blocks additional EPC allocation in
sgx_alloc_epc_page() when the limit is reached. Reclaimable pages are
still tracked in the global active list, only reclaimed by the global
reclaimer when the total free page count is lower than a threshold.

Later patches will reorganize the tracking and reclamation code in the
global reclaimer and implement per-cgroup tracking and reclaiming.

Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Kristen Carlson Accardi 
Co-developed-by: Haitao Huang 
Signed-off-by: Haitao Huang 


For consistency sake I'd also add co-developed-by for Kristen. This is
at least the format suggested by kernel documentation.


---
V7:
- Use a static for root cgroup (Kai)
- Wrap epc_cg field in sgx_epc_page struct with #ifdef (Kai)
- Correct check for charge API return (Kai)
- Start initialization in SGX device driver init (Kai)
- Remove unneeded BUG_ON (Kai)
- Split  sgx_get_current_epc_cg() out of sgx_epc_cg_try_charge() (Kai)

V6:
- Split the original large patch"Limit process EPC usage with misc
cgroup controller"  and restructure it (Kai)
---
 arch/x86/Kconfig | 13 +
 arch/x86/kernel/cpu/sgx/Makefile |  1 +
 arch/x86/kernel/cpu/sgx/epc_cgroup.c | 79 
 arch/x86/kernel/cpu/sgx/epc_cgroup.h | 73 +
 arch/x86/kernel/cpu/sgx/main.c   | 52 +-
 arch/x86/kernel/cpu/sgx/sgx.h|  5 ++
 include/linux/misc_cgroup.h  |  2 +
 7 files changed, 223 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/sgx/epc_cgroup.c
 create mode 100644 arch/x86/kernel/cpu/sgx/epc_cgroup.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..10c3d1d099b2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1947,6 +1947,19 @@ config X86_SGX

  If unsure, say N.

+config CGROUP_SGX_EPC
+	bool "Miscellaneous Cgroup Controller for Enclave Page Cache (EPC)  
for Intel SGX"

+   depends on X86_SGX && CGROUP_MISC
+   help
+ Provides control over the EPC footprint of tasks in a cgroup via
+ the Miscellaneous cgroup controller.
+
+ EPC is a subset of regular memory that is usable only by SGX
+ enclaves and is very limited in quantity, e.g. less than 1%
+ of total DRAM.
+
+ Say N if unsure.
+
 config X86_USER_SHADOW_STACK
bool "X86 userspace shadow stack"
depends on AS_WRUSS
diff --git a/arch/x86/kernel/cpu/sgx/Makefile  
b/arch/x86/kernel/cpu/sgx/Makefile

index 9c1656779b2a..12901a488da7 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -4,3 +4,4 @@ obj-y += \
ioctl.o \
main.o
 obj-$(CONFIG_X86_SGX_KVM)  += virt.o
+obj-$(CONFIG_CGROUP_SGX_EPC)  += epc_cgroup.o
diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c  
b/arch/x86/kernel/cpu/sgx/epc_cgroup.c

new file mode 100644
index ..938695816a9e
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2022 Intel Corporation.
+
+#include 
+#include 
+#include "epc_cgroup.h"
+
+static struct sgx_epc_cgroup epc_cg_root;
+
+/**
+ * sgx_epc_cgroup_try_charge() - try to charge cgroup for a single EPC  
page

+ *
+ * @epc_cg:The EPC cgroup to be charged for the page.
+ * Return:
+ * * %0 - If successfully charged.
+ * * -errno -

Re: [PATCH v7 01/15] cgroup/misc: Add per resource callbacks for CSS events

2024-01-23 Thread Haitao Huang

On Mon, 22 Jan 2024 14:14:01 -0600, Jarkko Sakkinen   
wrote:



On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote:

From: Kristen Carlson Accardi 

The misc cgroup controller (subsystem) currently does not perform
resource type specific action for Cgroups Subsystem State (CSS) events:
the 'css_alloc' event when a cgroup is created and the 'css_free' event
when a cgroup is destroyed.

Define callbacks for those events and allow resource providers to
register the callbacks per resource type as needed. This will be
utilized later by the EPC misc cgroup support implemented in the SGX
driver.

Signed-off-by: Kristen Carlson Accardi 
Co-developed-by: Haitao Huang 
Signed-off-by: Haitao Huang 
---
V7:
- Make ops one per resource type and store them in array (Michal)
- Rename the ops struct to misc_res_ops, and enforce the constraints of  
required callback

functions (Jarkko)
- Moved addition of priv field to patch 4 where it was used first.  
(Jarkko)


V6:
- Create ops struct for per resource callbacks (Jarkko)
- Drop max_write callback (Dave, Michal)
- Style fixes (Kai)
---
 include/linux/misc_cgroup.h | 11 +++
 kernel/cgroup/misc.c| 60 +++--
 2 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index e799b1f8d05b..0806d4436208 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -27,6 +27,16 @@ struct misc_cg;

 #include 

+/**
+ * struct misc_res_ops: per resource type callback ops.
+ * @alloc: invoked for resource specific initialization when cgroup is  
allocated.
+ * @free: invoked for resource specific cleanup when cgroup is  
deallocated.

+ */
+struct misc_res_ops {
+   int (*alloc)(struct misc_cg *cg);
+   void (*free)(struct misc_cg *cg);
+};
+
 /**
  * struct misc_res: Per cgroup per misc type resource
  * @max: Maximum limit on the resource.
@@ -56,6 +66,7 @@ struct misc_cg {

 u64 misc_cg_res_total_usage(enum misc_res_type type);
 int misc_cg_set_capacity(enum misc_res_type type, u64 capacity);
+int misc_cg_set_ops(enum misc_res_type type, const struct misc_res_ops  
*ops);
 int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg,  
u64 amount);
 void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64  
amount);


diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index 79a3717a5803..b8c32791334c 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -39,6 +39,9 @@ static struct misc_cg root_cg;
  */
 static u64 misc_res_capacity[MISC_CG_RES_TYPES];

+/* Resource type specific operations */
+static const struct misc_res_ops *misc_res_ops[MISC_CG_RES_TYPES];
+
 /**
  * parent_misc() - Get the parent of the passed misc cgroup.
  * @cgroup: cgroup whose parent needs to be fetched.
@@ -105,6 +108,36 @@ int misc_cg_set_capacity(enum misc_res_type type,  
u64 capacity)

 }
 EXPORT_SYMBOL_GPL(misc_cg_set_capacity);

+/**
+ * misc_cg_set_ops() - set resource specific operations.
+ * @type: Type of the misc res.
+ * @ops: Operations for the given type.
+ *
+ * Context: Any context.
+ * Return:
+ * * %0 - Successfully registered the operations.
+ * * %-EINVAL - If @type is invalid, or the operations missing any  
required callbacks.

+ */
+int misc_cg_set_ops(enum misc_res_type type, const struct misc_res_ops  
*ops)

+{
+   if (!valid_type(type))
+   return -EINVAL;
+
+   if (!ops->alloc) {
+   pr_err("%s: alloc missing\n", __func__);
+   return -EINVAL;
+   }
+
+   if (!ops->free) {
+   pr_err("%s: free missing\n", __func__);
+   return -EINVAL;
+   }
+
+   misc_res_ops[type] = ops;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(misc_cg_set_ops);
+
 /**
  * misc_cg_cancel_charge() - Cancel the charge from the misc cgroup.
  * @type: Misc res type in misc cg to cancel the charge from.
@@ -383,23 +416,37 @@ static struct cftype misc_cg_files[] = {
 static struct cgroup_subsys_state *
 misc_cg_alloc(struct cgroup_subsys_state *parent_css)
 {
+   struct misc_cg *parent_cg, *cg;
enum misc_res_type i;
-   struct misc_cg *cg;
+   int ret;

if (!parent_css) {
-   cg = &root_cg;
+   parent_cg = cg = &root_cg;
} else {
cg = kzalloc(sizeof(*cg), GFP_KERNEL);
if (!cg)
return ERR_PTR(-ENOMEM);
+   parent_cg = css_misc(parent_css);
}

for (i = 0; i < MISC_CG_RES_TYPES; i++) {
WRITE_ONCE(cg->res[i].max, MAX_NUM);
atomic64_set(&cg->res[i].usage, 0);
+   if (misc_res_ops[i]) {
+   ret = misc_res_ops[i]->alloc(cg);
+   if (ret)
+   goto alloc_err;


So I'd consider pattern like this to avoid repetition:

if (ret) {
__misc_cg_free(cg);

Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions

2024-01-23 Thread Vincent Donnefort

On Tue, Jan 23, 2024 at 10:51:49AM -0500, Steven Rostedt wrote:
> On Tue, 23 Jan 2024 11:07:53 +
> Vincent Donnefort  wrote:
> 
> > index ..5468afc94be7
> > --- /dev/null
> > +++ b/include/uapi/linux/trace_mmap.h
> > @@ -0,0 +1,44 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +#ifndef _TRACE_MMAP_H_
> > +#define _TRACE_MMAP_H_
> > +
> > +#include 
> > +
> > +/**
> > + * struct trace_buffer_meta - Ring-buffer Meta-page description
> > + * @meta_page_size:Size of this meta-page.
> > + * @meta_struct_len:   Size of this structure.
> > + * @subbuf_size:   Size of each subbuf, including the header.
> > + * @nr_subbufs:Number of subbfs in the ring-buffer.
> > + * @reader.lost_events:Number of events lost at the time of the reader 
> > swap.
> > + * @reader.id: subbuf ID of the current reader. From 0 to 
> > @nr_subbufs - 1
> > + * @reader.read:   Number of bytes read on the reader subbuf.
> > + * @entries:   Number of entries in the ring-buffer.
> > + * @overrun:   Number of entries lost in the ring-buffer.
> > + * @read:  Number of entries that have been read.
> 
> 
> > + * @subbufs_touched:   Number of subbufs that have been filled.
> > + * @subbufs_lost:  Number of subbufs lost to overrun.
> > + * @subbufs_read:  Number of subbufs that have been read.
> 
> Do we actually need the above 3 fields?
> 
> What's the use case for them? I don't want to expose internals in the API
> unless they are needed.

subbufs_read is gone, I just forgot to remove it here :-\.

The two other ones are used for tracing with the hypervisor. That's why I
preemptively added them. 

I can remove them and add just append this struct later ... or just overload
this struct with another one, only shared between the kernel and the hypervisor?

> 
> -- Steve

Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions

2024-01-23 Thread Steven Rostedt

On Tue, 23 Jan 2024 17:48:17 +
Vincent Donnefort  wrote:

> > > + * @subbufs_touched: Number of subbufs that have been filled.
> > > + * @subbufs_lost:Number of subbufs lost to overrun.
> > > + * @subbufs_read:Number of subbufs that have been read.  
> > 
> > Do we actually need the above 3 fields?
> > 
> > What's the use case for them? I don't want to expose internals in the API
> > unless they are needed.  
> 
> subbufs_read is gone, I just forgot to remove it here :-\.
> 
> The two other ones are used for tracing with the hypervisor. That's why I
> preemptively added them. 
> 
> I can remove them and add just append this struct later ... or just overload
> this struct with another one, only shared between the kernel and the 
> hypervisor?

Yes, let's have them be different for now. Or we could just make them
"reserved" and say that they are not used for now.

I also think we should add a flags value too, so that when they are no
longer reserved, a flag can be set to say that they are active.

struct trace_buffer_meta {
__u32   meta_page_size;
__u32   meta_struct_len;

__u32   subbuf_size;
__u32   nr_subbufs;

struct {
__u64   lost_events;
__u32   id;
__u32   read;
} reader;

__u64   flags;

__u64   entries;
__u64   overrun;
__u64   read;

__u64   Reserved1;
__u64   Reserved2;
};

And that way the hypervisor would still have those available, and updated,
but they are not filled for user space. The hypervisor could even set the
flag field saying it's using them.

That is, the first flag is:

enum {
TRACE_BUFFER_META_FL_SUBUF_INFO = (1 << 0),
};

And if that's set, those fields are available. But we just don't set them
now ;-)

-- Steve

[PATCH 5/9] remoteproc: qcom_q6v5_pas: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/qcom_q6v5_pas.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index a9dd58608052c..60b0cd30c0592 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -678,7 +678,7 @@ static int adsp_probe(struct platform_device *pdev)
if (desc->minidump_id)
ops = &adsp_minidump_ops;
 
-   rproc = rproc_alloc(&pdev->dev, pdev->name, ops, fw_name, 
sizeof(*adsp));
+   rproc = devm_rproc_alloc(&pdev->dev, pdev->name, ops, fw_name, 
sizeof(*adsp));
 
if (!rproc) {
dev_err(&pdev->dev, "unable to allocate remoteproc\n");
@@ -754,7 +754,6 @@ static int adsp_probe(struct platform_device *pdev)
adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count);
 free_rproc:
device_init_wakeup(adsp->dev, false);
-   rproc_free(rproc);
 
return ret;
 }
@@ -773,7 +772,6 @@ static void adsp_remove(struct platform_device *pdev)
qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count);
device_init_wakeup(adsp->dev, false);
-   rproc_free(adsp->rproc);
 }
 
 static const struct adsp_data adsp_resource_init = {
-- 
2.39.2

[PATCH 1/9] remoteproc: imx_dsp_rproc: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/imx_dsp_rproc.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/remoteproc/imx_dsp_rproc.c 
b/drivers/remoteproc/imx_dsp_rproc.c
index a1c62d15f16c6..56a799cb8b363 100644
--- a/drivers/remoteproc/imx_dsp_rproc.c
+++ b/drivers/remoteproc/imx_dsp_rproc.c
@@ -1104,8 +1104,8 @@ static int imx_dsp_rproc_probe(struct platform_device 
*pdev)
return ret;
}
 
-   rproc = rproc_alloc(dev, "imx-dsp-rproc", &imx_dsp_rproc_ops, fw_name,
-   sizeof(*priv));
+   rproc = devm_rproc_alloc(dev, "imx-dsp-rproc", &imx_dsp_rproc_ops,
+fw_name, sizeof(*priv));
if (!rproc)
return -ENOMEM;
 
@@ -1125,14 +1125,14 @@ static int imx_dsp_rproc_probe(struct platform_device 
*pdev)
ret = imx_dsp_rproc_detect_mode(priv);
if (ret) {
dev_err(dev, "failed on imx_dsp_rproc_detect_mode\n");
-   goto err_put_rproc;
+   return ret;
}
 
/* There are multiple power domains required by DSP on some platform */
ret = imx_dsp_attach_pm_domains(priv);
if (ret) {
dev_err(dev, "failed on imx_dsp_attach_pm_domains\n");
-   goto err_put_rproc;
+   return ret;
}
/* Get clocks */
ret = imx_dsp_rproc_clk_get(priv);
@@ -1155,8 +1155,6 @@ static int imx_dsp_rproc_probe(struct platform_device 
*pdev)
 
 err_detach_domains:
imx_dsp_detach_pm_domains(priv);
-err_put_rproc:
-   rproc_free(rproc);
 
return ret;
 }
@@ -1169,7 +1167,6 @@ static void imx_dsp_rproc_remove(struct platform_device 
*pdev)
pm_runtime_disable(&pdev->dev);
rproc_del(rproc);
imx_dsp_detach_pm_domains(priv);
-   rproc_free(rproc);
 }
 
 /* pm runtime functions */
-- 
2.39.2

[PATCH 6/9] remoteproc: qcom_q6v5_wcss: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/qcom_q6v5_wcss.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c 
b/drivers/remoteproc/qcom_q6v5_wcss.c
index cff1fa07d1def..94f68c919ee62 100644
--- a/drivers/remoteproc/qcom_q6v5_wcss.c
+++ b/drivers/remoteproc/qcom_q6v5_wcss.c
@@ -1011,8 +1011,8 @@ static int q6v5_wcss_probe(struct platform_device *pdev)
if (!desc)
return -EINVAL;
 
-   rproc = rproc_alloc(&pdev->dev, pdev->name, desc->ops,
-   desc->firmware_name, sizeof(*wcss));
+   rproc = devm_rproc_alloc(&pdev->dev, pdev->name, desc->ops,
+desc->firmware_name, sizeof(*wcss));
if (!rproc) {
dev_err(&pdev->dev, "failed to allocate rproc\n");
return -ENOMEM;
@@ -1027,29 +1027,29 @@ static int q6v5_wcss_probe(struct platform_device *pdev)
 
ret = q6v5_wcss_init_mmio(wcss, pdev);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = q6v5_alloc_memory_region(wcss);
if (ret)
-   goto free_rproc;
+   return ret;
 
if (wcss->version == WCSS_QCS404) {
ret = q6v5_wcss_init_clock(wcss);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = q6v5_wcss_init_regulator(wcss);
if (ret)
-   goto free_rproc;
+   return ret;
}
 
ret = q6v5_wcss_init_reset(wcss, desc);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = qcom_q6v5_init(&wcss->q6v5, pdev, rproc, desc->crash_reason_smem, 
NULL, NULL);
if (ret)
-   goto free_rproc;
+   return ret;
 
qcom_add_glink_subdev(rproc, &wcss->glink_subdev, "q6wcss");
qcom_add_ssr_subdev(rproc, &wcss->ssr_subdev, "q6wcss");
@@ -1061,16 +1061,11 @@ static int q6v5_wcss_probe(struct platform_device *pdev)
 
ret = rproc_add(rproc);
if (ret)
-   goto free_rproc;
+   return ret;
 
platform_set_drvdata(pdev, rproc);
 
return 0;
-
-free_rproc:
-   rproc_free(rproc);
-
-   return ret;
 }
 
 static void q6v5_wcss_remove(struct platform_device *pdev)
@@ -1080,7 +1075,6 @@ static void q6v5_wcss_remove(struct platform_device *pdev)
 
qcom_q6v5_deinit(&wcss->q6v5);
rproc_del(rproc);
-   rproc_free(rproc);
 }
 
 static const struct wcss_data wcss_ipq8074_res_init = {
-- 
2.39.2

[PATCH 3/9] remoteproc: qcom_q6v5_adsp: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/qcom_q6v5_adsp.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c 
b/drivers/remoteproc/qcom_q6v5_adsp.c
index 6c67514cc4931..34ac996a93b20 100644
--- a/drivers/remoteproc/qcom_q6v5_adsp.c
+++ b/drivers/remoteproc/qcom_q6v5_adsp.c
@@ -683,8 +683,8 @@ static int adsp_probe(struct platform_device *pdev)
return ret;
}
 
-   rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_ops,
-   firmware_name, sizeof(*adsp));
+   rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &adsp_ops,
+firmware_name, sizeof(*adsp));
if (!rproc) {
dev_err(&pdev->dev, "unable to allocate remoteproc\n");
return -ENOMEM;
@@ -709,17 +709,17 @@ static int adsp_probe(struct platform_device *pdev)
 
ret = adsp_alloc_memory_region(adsp);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = adsp_init_clock(adsp, desc->clk_ids);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = qcom_rproc_pds_attach(adsp->dev, adsp,
desc->proxy_pd_names);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to attach proxy power domains\n");
-   goto free_rproc;
+   return ret;
}
adsp->proxy_pd_count = ret;
 
@@ -755,9 +755,6 @@ static int adsp_probe(struct platform_device *pdev)
 disable_pm:
qcom_rproc_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count);
 
-free_rproc:
-   rproc_free(rproc);
-
return ret;
 }
 
@@ -772,7 +769,6 @@ static void adsp_remove(struct platform_device *pdev)
qcom_remove_sysmon_subdev(adsp->sysmon);
qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
qcom_rproc_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count);
-   rproc_free(adsp->rproc);
 }
 
 static const struct adsp_pil_data adsp_resource_init = {
-- 
2.39.2

[PATCH 7/9] remoteproc: qcom_wcnss: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/qcom_wcnss.c | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index 90de22c81da97..a7bb9da27029d 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -555,8 +555,8 @@ static int wcnss_probe(struct platform_device *pdev)
if (ret < 0 && ret != -EINVAL)
return ret;
 
-   rproc = rproc_alloc(&pdev->dev, pdev->name, &wcnss_ops,
-   fw_name, sizeof(*wcnss));
+   rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &wcnss_ops,
+fw_name, sizeof(*wcnss));
if (!rproc) {
dev_err(&pdev->dev, "unable to allocate remoteproc\n");
return -ENOMEM;
@@ -574,14 +574,12 @@ static int wcnss_probe(struct platform_device *pdev)
mutex_init(&wcnss->iris_lock);
 
mmio = devm_platform_ioremap_resource_byname(pdev, "pmu");
-   if (IS_ERR(mmio)) {
-   ret = PTR_ERR(mmio);
-   goto free_rproc;
-   }
+   if (IS_ERR(mmio))
+   return PTR_ERR(mmio);
 
ret = wcnss_alloc_memory_region(wcnss);
if (ret)
-   goto free_rproc;
+   return ret;
 
wcnss->pmu_cfg = mmio + data->pmu_offset;
wcnss->spare_out = mmio + data->spare_offset;
@@ -592,7 +590,7 @@ static int wcnss_probe(struct platform_device *pdev)
 */
ret = wcnss_init_pds(wcnss, data->pd_names);
if (ret && (ret != -ENODATA || !data->num_pd_vregs))
-   goto free_rproc;
+   return ret;
 
ret = wcnss_init_regulators(wcnss, data->vregs, data->num_vregs,
data->num_pd_vregs);
@@ -656,8 +654,6 @@ static int wcnss_probe(struct platform_device *pdev)
qcom_iris_remove(wcnss->iris);
 detach_pds:
wcnss_release_pds(wcnss);
-free_rproc:
-   rproc_free(rproc);
 
return ret;
 }
@@ -673,7 +669,6 @@ static void wcnss_remove(struct platform_device *pdev)
qcom_remove_sysmon_subdev(wcnss->sysmon);
qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev);
wcnss_release_pds(wcnss);
-   rproc_free(wcnss->rproc);
 }
 
 static const struct of_device_id wcnss_of_match[] = {
-- 
2.39.2

[PATCH 8/9] remoteproc: st: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/st_remoteproc.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/remoteproc/st_remoteproc.c 
b/drivers/remoteproc/st_remoteproc.c
index cb163766c56d5..1340be9d01101 100644
--- a/drivers/remoteproc/st_remoteproc.c
+++ b/drivers/remoteproc/st_remoteproc.c
@@ -347,23 +347,21 @@ static int st_rproc_probe(struct platform_device *pdev)
int enabled;
int ret, i;
 
-   rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata));
+   rproc = devm_rproc_alloc(dev, np->name, &st_rproc_ops, NULL, 
sizeof(*ddata));
if (!rproc)
return -ENOMEM;
 
rproc->has_iommu = false;
ddata = rproc->priv;
ddata->config = (struct st_rproc_config *)device_get_match_data(dev);
-   if (!ddata->config) {
-   ret = -ENODEV;
-   goto free_rproc;
-   }
+   if (!ddata->config)
+   return -ENODEV;
 
platform_set_drvdata(pdev, rproc);
 
ret = st_rproc_parse_dt(pdev);
if (ret)
-   goto free_rproc;
+   return ret;
 
enabled = st_rproc_state(pdev);
if (enabled < 0) {
@@ -439,8 +437,7 @@ static int st_rproc_probe(struct platform_device *pdev)
mbox_free_channel(ddata->mbox_chan[i]);
 free_clk:
clk_unprepare(ddata->clk);
-free_rproc:
-   rproc_free(rproc);
+
return ret;
 }
 
@@ -456,8 +453,6 @@ static void st_rproc_remove(struct platform_device *pdev)
 
for (i = 0; i < ST_RPROC_MAX_VRING * MBOX_MAX; i++)
mbox_free_channel(ddata->mbox_chan[i]);
-
-   rproc_free(rproc);
 }
 
 static struct platform_driver st_rproc_driver = {
-- 
2.39.2

[PATCH 4/9] remoteproc: qcom_q6v5_mss: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/qcom_q6v5_mss.c | 28 
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_mss.c 
b/drivers/remoteproc/qcom_q6v5_mss.c
index 394b2c1cb5e21..1779fc890e102 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -1990,8 +1990,8 @@ static int q6v5_probe(struct platform_device *pdev)
return ret;
}
 
-   rproc = rproc_alloc(&pdev->dev, pdev->name, &q6v5_ops,
-   mba_image, sizeof(*qproc));
+   rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &q6v5_ops,
+mba_image, sizeof(*qproc));
if (!rproc) {
dev_err(&pdev->dev, "failed to allocate rproc\n");
return -ENOMEM;
@@ -2008,7 +2008,7 @@ static int q6v5_probe(struct platform_device *pdev)
1, &qproc->hexagon_mdt_image);
if (ret < 0 && ret != -EINVAL) {
dev_err(&pdev->dev, "unable to read mpss firmware-name\n");
-   goto free_rproc;
+   return ret;
}
 
platform_set_drvdata(pdev, qproc);
@@ -2019,17 +2019,17 @@ static int q6v5_probe(struct platform_device *pdev)
qproc->has_spare_reg = desc->has_spare_reg;
ret = q6v5_init_mem(qproc, pdev);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = q6v5_alloc_memory_region(qproc);
if (ret)
-   goto free_rproc;
+   return ret;
 
ret = q6v5_init_clocks(&pdev->dev, qproc->proxy_clks,
   desc->proxy_clk_names);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get proxy clocks.\n");
-   goto free_rproc;
+   return ret;
}
qproc->proxy_clk_count = ret;
 
@@ -2037,7 +2037,7 @@ static int q6v5_probe(struct platform_device *pdev)
   desc->reset_clk_names);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get reset clocks.\n");
-   goto free_rproc;
+   return ret;
}
qproc->reset_clk_count = ret;
 
@@ -2045,7 +2045,7 @@ static int q6v5_probe(struct platform_device *pdev)
   desc->active_clk_names);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get active clocks.\n");
-   goto free_rproc;
+   return ret;
}
qproc->active_clk_count = ret;
 
@@ -2053,7 +2053,7 @@ static int q6v5_probe(struct platform_device *pdev)
  desc->proxy_supply);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get proxy regulators.\n");
-   goto free_rproc;
+   return ret;
}
qproc->proxy_reg_count = ret;
 
@@ -2061,7 +2061,7 @@ static int q6v5_probe(struct platform_device *pdev)
  desc->active_supply);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get active regulators.\n");
-   goto free_rproc;
+   return ret;
}
qproc->active_reg_count = ret;
 
@@ -2074,12 +2074,12 @@ static int q6v5_probe(struct platform_device *pdev)
  desc->fallback_proxy_supply);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get fallback proxy 
regulators.\n");
-   goto free_rproc;
+   return ret;
}
qproc->fallback_proxy_reg_count = ret;
} else if (ret < 0) {
dev_err(&pdev->dev, "Failed to init power domains\n");
-   goto free_rproc;
+   return ret;
} else {
qproc->proxy_pd_count = ret;
}
@@ -2127,8 +2127,6 @@ static int q6v5_probe(struct platform_device *pdev)
qcom_remove_glink_subdev(rproc, &qproc->glink_subdev);
 detach_proxy_pds:
q6v5_pds_detach(qproc, qproc->proxy_pds, qproc->proxy_pd_count);
-free_rproc:
-   rproc_free(rproc);
 
return ret;
 }
@@ -2149,8 +2147,6 @@ static void q6v5_remove(struct platform_device *pdev)
qcom_remove_glink_subdev(rproc, &qproc->glink_subdev);
 
q6v5_pds_detach(qproc, qproc->proxy_pds, qproc->proxy_pd_count);
-
-   rproc_free(rproc);
 }
 
 static const struct rproc_hexagon_res sc7180_mss = {
-- 
2.39.2

[PATCH 2/9] remoteproc: imx_rproc: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/imx_rproc.c | 16 +---
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 8bb293b9f327c..55ecce3ab5f75 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -1104,16 +1104,14 @@ static int imx_rproc_probe(struct platform_device *pdev)
int ret;
 
/* set some other name then imx */
-   rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops,
-   NULL, sizeof(*priv));
+   rproc = devm_rproc_alloc(dev, "imx-rproc", &imx_rproc_ops,
+NULL, sizeof(*priv));
if (!rproc)
return -ENOMEM;
 
dcfg = of_device_get_match_data(dev);
-   if (!dcfg) {
-   ret = -EINVAL;
-   goto err_put_rproc;
-   }
+   if (!dcfg)
+   return -EINVAL;
 
priv = rproc->priv;
priv->rproc = rproc;
@@ -1124,8 +1122,7 @@ static int imx_rproc_probe(struct platform_device *pdev)
priv->workqueue = create_workqueue(dev_name(dev));
if (!priv->workqueue) {
dev_err(dev, "cannot create workqueue\n");
-   ret = -ENOMEM;
-   goto err_put_rproc;
+   return -ENOMEM;
}
 
ret = imx_rproc_xtr_mbox_init(rproc);
@@ -1167,8 +1164,6 @@ static int imx_rproc_probe(struct platform_device *pdev)
imx_rproc_free_mbox(rproc);
 err_put_wkq:
destroy_workqueue(priv->workqueue);
-err_put_rproc:
-   rproc_free(rproc);
 
return ret;
 }
@@ -1183,7 +1178,6 @@ static void imx_rproc_remove(struct platform_device *pdev)
imx_rproc_put_scu(rproc);
imx_rproc_free_mbox(rproc);
destroy_workqueue(priv->workqueue);
-   rproc_free(rproc);
 }
 
 static const struct of_device_id imx_rproc_of_match[] = {
-- 
2.39.2

[PATCH 9/9] remoteproc: stm32: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/stm32_rproc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c
index 4f469f0bcf8b2..fed0866de1819 100644
--- a/drivers/remoteproc/stm32_rproc.c
+++ b/drivers/remoteproc/stm32_rproc.c
@@ -843,7 +843,7 @@ static int stm32_rproc_probe(struct platform_device *pdev)
if (ret)
return ret;
 
-   rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata));
+   rproc = devm_rproc_alloc(dev, np->name, &st_rproc_ops, NULL, 
sizeof(*ddata));
if (!rproc)
return -ENOMEM;
 
@@ -897,7 +897,6 @@ static int stm32_rproc_probe(struct platform_device *pdev)
dev_pm_clear_wake_irq(dev);
device_init_wakeup(dev, false);
}
-   rproc_free(rproc);
return ret;
 }
 
@@ -918,7 +917,6 @@ static void stm32_rproc_remove(struct platform_device *pdev)
dev_pm_clear_wake_irq(dev);
device_init_wakeup(dev, false);
}
-   rproc_free(rproc);
 }
 
 static int stm32_rproc_suspend(struct device *dev)
-- 
2.39.2

[PATCH 1/8] remoteproc: k3-dsp: Use devm_rproc_alloc() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed allocation function. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 16 +---
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index ab882e3b7130b..93fbc89307d6a 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -690,8 +690,8 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "failed to parse firmware-name 
property\n");
 
-   rproc = rproc_alloc(dev, dev_name(dev), &k3_dsp_rproc_ops, fw_name,
-   sizeof(*kproc));
+   rproc = devm_rproc_alloc(dev, dev_name(dev), &k3_dsp_rproc_ops,
+fw_name, sizeof(*kproc));
if (!rproc)
return -ENOMEM;
 
@@ -707,12 +707,9 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
kproc->data = data;
 
kproc->ti_sci = ti_sci_get_by_phandle(np, "ti,sci");
-   if (IS_ERR(kproc->ti_sci)) {
-   ret = dev_err_probe(dev, PTR_ERR(kproc->ti_sci),
-   "failed to get ti-sci handle\n");
-   kproc->ti_sci = NULL;
-   goto free_rproc;
-   }
+   if (IS_ERR(kproc->ti_sci))
+   return dev_err_probe(dev, PTR_ERR(kproc->ti_sci),
+"failed to get ti-sci handle\n");
 
ret = of_property_read_u32(np, "ti,sci-dev-id", &kproc->ti_sci_id);
if (ret) {
@@ -810,8 +807,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
ret1 = ti_sci_put_handle(kproc->ti_sci);
if (ret1)
dev_err(dev, "failed to put ti_sci handle (%pe)\n", 
ERR_PTR(ret1));
-free_rproc:
-   rproc_free(rproc);
return ret;
 }
 
@@ -844,7 +839,6 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
dev_err(dev, "failed to put ti_sci handle (%pe)\n", 
ERR_PTR(ret));
 
k3_dsp_reserved_mem_exit(kproc);
-   rproc_free(kproc->rproc);
 }
 
 static const struct k3_dsp_mem_data c66_mems[] = {
-- 
2.39.2

[PATCH 2/8] remoteproc: k3-dsp: Add devm action to release reserved memory

2024-01-23 Thread Andrew Davis

Use a device lifecycle managed action to release reserved memory. This
helps prevent mistakes like releasing out of order in cleanup functions
and forgetting to release on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index 93fbc89307d6a..0cb00146fe977 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -550,6 +550,13 @@ static int k3_dsp_rproc_of_get_memories(struct 
platform_device *pdev,
return 0;
 }
 
+static void k3_dsp_mem_release(void *data)
+{
+   struct device *dev = data;
+
+   of_reserved_mem_device_release(dev);
+}
+
 static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc)
 {
struct device *dev = kproc->dev;
@@ -579,13 +586,14 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
ERR_PTR(ret));
return ret;
}
+   ret = devm_add_action_or_reset(dev, k3_dsp_mem_release, dev);
+   if (ret)
+   return ret;
 
num_rmems--;
kproc->rmem = kcalloc(num_rmems, sizeof(*kproc->rmem), GFP_KERNEL);
-   if (!kproc->rmem) {
-   ret = -ENOMEM;
-   goto release_rmem;
-   }
+   if (!kproc->rmem)
+   return -ENOMEM;
 
/* use remaining reserved memory regions for static carveouts */
for (i = 0; i < num_rmems; i++) {
@@ -628,8 +636,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
for (i--; i >= 0; i--)
iounmap(kproc->rmem[i].cpu_addr);
kfree(kproc->rmem);
-release_rmem:
-   of_reserved_mem_device_release(kproc->dev);
return ret;
 }
 
@@ -640,8 +646,6 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc 
*kproc)
for (i = 0; i < kproc->num_rmems; i++)
iounmap(kproc->rmem[i].cpu_addr);
kfree(kproc->rmem);
-
-   of_reserved_mem_device_release(kproc->dev);
 }
 
 static
-- 
2.39.2

[PATCH 3/8] remoteproc: k3-dsp: Use devm_kcalloc() helper

2024-01-23 Thread Andrew Davis

Use a device lifecycle managed action to free memory. This helps prevent
mistakes like freeing out of order in cleanup functions and forgetting to
free on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index 0cb00146fe977..a13552c71f440 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -591,7 +591,7 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
return ret;
 
num_rmems--;
-   kproc->rmem = kcalloc(num_rmems, sizeof(*kproc->rmem), GFP_KERNEL);
+   kproc->rmem = devm_kcalloc(dev, num_rmems, sizeof(*kproc->rmem), 
GFP_KERNEL);
if (!kproc->rmem)
return -ENOMEM;
 
@@ -635,7 +635,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
 unmap_rmem:
for (i--; i >= 0; i--)
iounmap(kproc->rmem[i].cpu_addr);
-   kfree(kproc->rmem);
return ret;
 }
 
@@ -645,7 +644,6 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc 
*kproc)
 
for (i = 0; i < kproc->num_rmems; i++)
iounmap(kproc->rmem[i].cpu_addr);
-   kfree(kproc->rmem);
 }
 
 static
-- 
2.39.2

[PATCH 4/8] remoteproc: k3-dsp: Use devm_ti_sci_get_by_phandle() helper

2024-01-23 Thread Andrew Davis

Use the device lifecycle managed TI-SCI get() function. This helps prevent
mistakes like not put()'ing in the wrong order in cleanup functions and
forgetting to put() on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 33 +++
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index a13552c71f440..0240340a83e90 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -682,7 +682,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
const char *fw_name;
bool p_state = false;
int ret = 0;
-   int ret1;
 
data = of_device_get_match_data(dev);
if (!data)
@@ -708,30 +707,24 @@ static int k3_dsp_rproc_probe(struct platform_device 
*pdev)
kproc->dev = dev;
kproc->data = data;
 
-   kproc->ti_sci = ti_sci_get_by_phandle(np, "ti,sci");
+   kproc->ti_sci = devm_ti_sci_get_by_phandle(dev, "ti,sci");
if (IS_ERR(kproc->ti_sci))
return dev_err_probe(dev, PTR_ERR(kproc->ti_sci),
 "failed to get ti-sci handle\n");
 
ret = of_property_read_u32(np, "ti,sci-dev-id", &kproc->ti_sci_id);
-   if (ret) {
-   dev_err_probe(dev, ret, "missing 'ti,sci-dev-id' property\n");
-   goto put_sci;
-   }
+   if (ret)
+   return dev_err_probe(dev, ret, "missing 'ti,sci-dev-id' 
property\n");
 
kproc->reset = devm_reset_control_get_exclusive(dev, NULL);
-   if (IS_ERR(kproc->reset)) {
-   ret = dev_err_probe(dev, PTR_ERR(kproc->reset),
-   "failed to get reset\n");
-   goto put_sci;
-   }
+   if (IS_ERR(kproc->reset))
+   return dev_err_probe(dev, PTR_ERR(kproc->reset),
+"failed to get reset\n");
 
kproc->tsp = k3_dsp_rproc_of_get_tsp(dev, kproc->ti_sci);
-   if (IS_ERR(kproc->tsp)) {
-   ret = dev_err_probe(dev, PTR_ERR(kproc->tsp),
-   "failed to construct ti-sci proc 
control\n");
-   goto put_sci;
-   }
+   if (IS_ERR(kproc->tsp))
+   return dev_err_probe(dev, PTR_ERR(kproc->tsp),
+"failed to construct ti-sci proc 
control\n");
 
ret = ti_sci_proc_request(kproc->tsp);
if (ret < 0) {
@@ -805,10 +798,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1));
 free_tsp:
kfree(kproc->tsp);
-put_sci:
-   ret1 = ti_sci_put_handle(kproc->ti_sci);
-   if (ret1)
-   dev_err(dev, "failed to put ti_sci handle (%pe)\n", 
ERR_PTR(ret1));
return ret;
 }
 
@@ -836,10 +825,6 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
 
kfree(kproc->tsp);
 
-   ret = ti_sci_put_handle(kproc->ti_sci);
-   if (ret)
-   dev_err(dev, "failed to put ti_sci handle (%pe)\n", 
ERR_PTR(ret));
-
k3_dsp_reserved_mem_exit(kproc);
 }
 
-- 
2.39.2

[PATCH 7/8] remoteproc: k3-dsp: Use devm_ioremap_wc() helper

2024-01-23 Thread Andrew Davis

Use a device lifecycle managed ioremap helper function. This helps prevent
mistakes like unmapping out of order in cleanup functions and forgetting
to unmap on all error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 48 +--
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index 800c8c6767086..f799f74734b4a 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -598,16 +598,13 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
/* use remaining reserved memory regions for static carveouts */
for (i = 0; i < num_rmems; i++) {
rmem_np = of_parse_phandle(np, "memory-region", i + 1);
-   if (!rmem_np) {
-   ret = -EINVAL;
-   goto unmap_rmem;
-   }
+   if (!rmem_np)
+   return -EINVAL;
 
rmem = of_reserved_mem_lookup(rmem_np);
if (!rmem) {
of_node_put(rmem_np);
-   ret = -EINVAL;
-   goto unmap_rmem;
+   return -EINVAL;
}
of_node_put(rmem_np);
 
@@ -615,12 +612,11 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
/* 64-bit address regions currently not supported */
kproc->rmem[i].dev_addr = (u32)rmem->base;
kproc->rmem[i].size = rmem->size;
-   kproc->rmem[i].cpu_addr = ioremap_wc(rmem->base, rmem->size);
+   kproc->rmem[i].cpu_addr = devm_ioremap_wc(dev, rmem->base, 
rmem->size);
if (!kproc->rmem[i].cpu_addr) {
dev_err(dev, "failed to map reserved memory#%d at %pa 
of size %pa\n",
i + 1, &rmem->base, &rmem->size);
-   ret = -ENOMEM;
-   goto unmap_rmem;
+   return -ENOMEM;
}
 
dev_dbg(dev, "reserved memory%d: bus addr %pa size 0x%zx va %pK 
da 0x%x\n",
@@ -631,19 +627,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc 
*kproc)
kproc->num_rmems = num_rmems;
 
return 0;
-
-unmap_rmem:
-   for (i--; i >= 0; i--)
-   iounmap(kproc->rmem[i].cpu_addr);
-   return ret;
-}
-
-static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc *kproc)
-{
-   int i;
-
-   for (i = 0; i < kproc->num_rmems; i++)
-   iounmap(kproc->rmem[i].cpu_addr);
 }
 
 static void k3_dsp_release_tsp(void *data)
@@ -752,10 +735,8 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
 
ret = kproc->ti_sci->ops.dev_ops.is_on(kproc->ti_sci, kproc->ti_sci_id,
   NULL, &p_state);
-   if (ret) {
-   dev_err_probe(dev, ret, "failed to get initial state, mode 
cannot be determined\n");
-   goto release_mem;
-   }
+   if (ret)
+   return dev_err_probe(dev, ret, "failed to get initial state, 
mode cannot be determined\n");
 
/* configure J721E devices for either remoteproc or IPC-only mode */
if (p_state) {
@@ -779,8 +760,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
if (data->uses_lreset) {
ret = reset_control_status(kproc->reset);
if (ret < 0) {
-   dev_err_probe(dev, ret, "failed to get reset 
status\n");
-   goto release_mem;
+   return dev_err_probe(dev, ret, "failed to get 
reset status\n");
} else if (ret == 0) {
dev_warn(dev, "local reset is deasserted for 
device\n");
k3_dsp_rproc_reset(kproc);
@@ -789,18 +769,12 @@ static int k3_dsp_rproc_probe(struct platform_device 
*pdev)
}
 
ret = rproc_add(rproc);
-   if (ret) {
-   dev_err_probe(dev, ret, "failed to add register device with 
remoteproc core\n");
-   goto release_mem;
-   }
+   if (ret)
+   return dev_err_probe(dev, ret, "failed to add register device 
with remoteproc core\n");
 
platform_set_drvdata(pdev, kproc);
 
return 0;
-
-release_mem:
-   k3_dsp_reserved_mem_exit(kproc);
-   return ret;
 }
 
 static void k3_dsp_rproc_remove(struct platform_device *pdev)
@@ -820,8 +794,6 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
}
 
rproc_del(kproc->rproc);
-
-   k3_dsp_reserved_mem_exit(kproc);
 }
 
 static const struct k3_dsp_mem_data c66_mems[] = {
-- 
2.39.2

[PATCH 6/8] remoteproc: k3-dsp: Add devm action to release tsp

2024-01-23 Thread Andrew Davis

Use a device lifecycle managed action to release tps ti_sci_proc handle.
This helps prevent mistakes like releasing out of order in cleanup
functions and forgetting to release on error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 26 +++
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index 2aac25d013985..800c8c6767086 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -646,6 +646,13 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc 
*kproc)
iounmap(kproc->rmem[i].cpu_addr);
 }
 
+static void k3_dsp_release_tsp(void *data)
+{
+   struct ti_sci_proc *tsp = data;
+
+   ti_sci_proc_release(tsp);
+}
+
 static
 struct ti_sci_proc *k3_dsp_rproc_of_get_tsp(struct device *dev,
const struct ti_sci_handle *sci)
@@ -731,16 +738,17 @@ static int k3_dsp_rproc_probe(struct platform_device 
*pdev)
dev_err_probe(dev, ret, "ti_sci_proc_request failed\n");
return ret;
}
+   ret = devm_add_action_or_reset(dev, k3_dsp_release_tsp, kproc->tsp);
+   if (ret)
+   return ret;
 
ret = k3_dsp_rproc_of_get_memories(pdev, kproc);
if (ret)
-   goto release_tsp;
+   return ret;
 
ret = k3_dsp_reserved_mem_init(kproc);
-   if (ret) {
-   dev_err_probe(dev, ret, "reserved memory init failed\n");
-   goto release_tsp;
-   }
+   if (ret)
+   return dev_err_probe(dev, ret, "reserved memory init failed\n");
 
ret = kproc->ti_sci->ops.dev_ops.is_on(kproc->ti_sci, kproc->ti_sci_id,
   NULL, &p_state);
@@ -792,10 +800,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
 
 release_mem:
k3_dsp_reserved_mem_exit(kproc);
-release_tsp:
-   ret1 = ti_sci_proc_release(kproc->tsp);
-   if (ret1)
-   dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1));
return ret;
 }
 
@@ -817,10 +821,6 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
 
rproc_del(kproc->rproc);
 
-   ret = ti_sci_proc_release(kproc->tsp);
-   if (ret)
-   dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret));
-
k3_dsp_reserved_mem_exit(kproc);
 }
 
-- 
2.39.2

[PATCH 8/8] remoteproc: k3-dsp: Use devm_rproc_add() helper

2024-01-23 Thread Andrew Davis

Use device lifecycle managed devm_rproc_add() helper function. This helps
prevent mistakes like deleting out of order in cleanup functions and
forgetting to delete on all error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index f799f74734b4a..3555b535b1683 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -768,7 +768,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
}
}
 
-   ret = rproc_add(rproc);
+   ret = devm_rproc_add(dev, rproc);
if (ret)
return dev_err_probe(dev, ret, "failed to add register device 
with remoteproc core\n");
 
@@ -786,14 +786,9 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
 
if (rproc->state == RPROC_ATTACHED) {
ret = rproc_detach(rproc);
-   if (ret) {
-   /* Note this error path leaks resources */
+   if (ret)
dev_err(dev, "failed to detach proc (%pe)\n", 
ERR_PTR(ret));
-   return;
-   }
}
-
-   rproc_del(kproc->rproc);
 }
 
 static const struct k3_dsp_mem_data c66_mems[] = {
-- 
2.39.2

[PATCH 5/8] remoteproc: k3-dsp: Use devm_kzalloc() helper

2024-01-23 Thread Andrew Davis

Use device lifecycle managed devm_kzalloc() helper function. This helps
prevent mistakes like freeing out of order in cleanup functions and
forgetting to free on all error paths.

Signed-off-by: Andrew Davis 
---
 drivers/remoteproc/ti_k3_dsp_remoteproc.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c 
b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
index 0240340a83e90..2aac25d013985 100644
--- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c
+++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c
@@ -659,7 +659,7 @@ struct ti_sci_proc *k3_dsp_rproc_of_get_tsp(struct device 
*dev,
if (ret < 0)
return ERR_PTR(ret);
 
-   tsp = kzalloc(sizeof(*tsp), GFP_KERNEL);
+   tsp = devm_kzalloc(dev, sizeof(*tsp), GFP_KERNEL);
if (!tsp)
return ERR_PTR(-ENOMEM);
 
@@ -729,7 +729,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
ret = ti_sci_proc_request(kproc->tsp);
if (ret < 0) {
dev_err_probe(dev, ret, "ti_sci_proc_request failed\n");
-   goto free_tsp;
+   return ret;
}
 
ret = k3_dsp_rproc_of_get_memories(pdev, kproc);
@@ -796,8 +796,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev)
ret1 = ti_sci_proc_release(kproc->tsp);
if (ret1)
dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1));
-free_tsp:
-   kfree(kproc->tsp);
return ret;
 }
 
@@ -823,8 +821,6 @@ static void k3_dsp_rproc_remove(struct platform_device 
*pdev)
if (ret)
dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret));
 
-   kfree(kproc->tsp);
-
k3_dsp_reserved_mem_exit(kproc);
 }
 
-- 
2.39.2

Re: [PATCH 0/2] remoteproc: stm32: Fix sparse warnings

2024-01-23 Thread Mathieu Poirier

On Wed, Jan 17, 2024 at 02:53:10PM +0100, Arnaud Pouliquen wrote:
> Fix warnings reported by sparse using make option "C=1"
> 
> Arnaud Pouliquen (2):
>   remoteproc: stm32: Fix incorrect type in assignment for va
>   remoteproc: stm32: Fix incorrect type assignment returned by
> stm32_rproc_get_loaded_rsc_tablef
> 
>  drivers/remoteproc/stm32_rproc.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>

I have applied this set.

Thanks,
Mathieu

> 
> base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
> -- 
> 2.25.1
>

[PATCH 3/3] arm64: dts: qcom: msm8953: add reset for display subsystem

2024-01-23 Thread Luca Weiss

From: Vladimir Lypak 

With this reset we can avoid situations like IRQ storms from DSI host
before it even started probing (because boot-loader left DSI IRQs on).

Signed-off-by: Vladimir Lypak 
Signed-off-by: Luca Weiss 
---
 arch/arm64/boot/dts/qcom/msm8953.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi 
b/arch/arm64/boot/dts/qcom/msm8953.dtsi
index ad2f8cf9c966..dcb5c98b793c 100644
--- a/arch/arm64/boot/dts/qcom/msm8953.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi
@@ -859,6 +859,8 @@ mdss: display-subsystem@1a0 {
  "vsync",
  "core";
 
+   resets = <&gcc GCC_MDSS_BCR>;
+
#address-cells = <1>;
#size-cells = <1>;
ranges;

-- 
2.43.0

[PATCH 2/3] clk: qcom: gcc-msm8953: add MDSS_BCR reset

2024-01-23 Thread Luca Weiss

From: Vladimir Lypak 

Add an entry in the gcc driver for the MDSS_BCR reset found on MSM8953.

Signed-off-by: Vladimir Lypak 
[luca: expand commit message, move entry]
Signed-off-by: Luca Weiss 
---
 drivers/clk/qcom/gcc-msm8953.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/qcom/gcc-msm8953.c b/drivers/clk/qcom/gcc-msm8953.c
index 3e5a8cb14d4d..5725857faae6 100644
--- a/drivers/clk/qcom/gcc-msm8953.c
+++ b/drivers/clk/qcom/gcc-msm8953.c
@@ -4171,6 +4171,7 @@ static const struct qcom_reset_map gcc_msm8953_resets[] = 
{
[GCC_USB3PHY_PHY_BCR]   = { 0x3f03c },
[GCC_USB3_PHY_BCR]  = { 0x3f034 },
[GCC_USB_30_BCR]= { 0x3f070 },
+   [GCC_MDSS_BCR]  = { 0x4d074 },
 };
 
 static const struct regmap_config gcc_msm8953_regmap_config = {

-- 
2.43.0

[PATCH 1/3] dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem

2024-01-23 Thread Luca Weiss

From: Vladimir Lypak 

Add a new define for the GCC_MDSS_BCR found on MSM8953.

Signed-off-by: Vladimir Lypak 
[luca: expand commit message]
Signed-off-by: Luca Weiss 
---
 include/dt-bindings/clock/qcom,gcc-msm8953.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/dt-bindings/clock/qcom,gcc-msm8953.h 
b/include/dt-bindings/clock/qcom,gcc-msm8953.h
index 783162da6148..28090f9d7f24 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8953.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8953.h
@@ -218,6 +218,7 @@
 #define GCC_USB3PHY_PHY_BCR3
 #define GCC_USB3_PHY_BCR   4
 #define GCC_USB_30_BCR 5
+#define GCC_MDSS_BCR   6
 
 /* GDSCs */
 #define CPP_GDSC   0

-- 
2.43.0

[PATCH 0/3] Add MDSS_BCR reset for MSM8953

2024-01-23 Thread Luca Weiss

Add the MDSS_BCR reset that is found in the GCC of MSM8953 so we can
make sure the MDSS gets properly reset before Linux starts using it.

Signed-off-by: Luca Weiss 
---
Vladimir Lypak (3):
  dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem
  clk: qcom: gcc-msm8953: add MDSS_BCR reset
  arm64: dts: qcom: msm8953: add reset for display subsystem

 arch/arm64/boot/dts/qcom/msm8953.dtsi| 2 ++
 drivers/clk/qcom/gcc-msm8953.c   | 1 +
 include/dt-bindings/clock/qcom,gcc-msm8953.h | 1 +
 3 files changed, 4 insertions(+)
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20240123-msm8953-mdss-reset-68308a03fff5

Best regards,
-- 
Luca Weiss

[PATCH 0/4] tracing/user_events: Introduce multi-format events

2024-01-23 Thread Beau Belgrave

Currently user_events supports 1 event with the same name and must have
the exact same format when referenced by multiple programs. This opens
an opportunity for malicous or poorly thought through programs to
create events that others use with different formats. Another scenario
is user programs wishing to use the same event name but add more fields
later when the software updates. Various versions of a program may be
running side-by-side, which is prevented by the current single format
requirement.

Add a new register flag (USER_EVENT_REG_MULTI_FORMAT) which indicates
the user program wishes to use the same user_event name, but may have
several different formats of the event in the future. When this flag is
used, create the underlying tracepoint backing the user_event with a
unique name per-version of the format. It's important that existing ABI
users do not get this logic automatically, even if one of the multi
format events matches the format. This ensures existing programs that
create events and assume the tracepoint name will match exactly continue
to work as expected. Add logic to only check multi-format events with
other multi-format events and single-format events to only check
single-format events during find.

Add a register_name (reg_name) to the user_event struct which allows for
split naming of events. We now have the name that was used to register
within user_events as well as the unique name for the tracepoint. Upon
registering events ensure matches based on first the reg_name, followed
by the fields and format of the event. This allows for multiple events
with the same registered name to have different formats. The underlying
tracepoint will have a unique name in the format of {reg_name}:[unique_id].
The unique_id is the time, in nanoseconds, of the event creation converted
to hex. Since this is done under the register mutex, it is extremely
unlikely for these IDs to ever match. It's also very unlikely a malicious
program could consistently guess what the name would be and attempt to
squat on it via the single format ABI.

For example, if both "test u32 value" and "test u64 value" are used with
the USER_EVENT_REG_MULTI_FORMAT the system would have 2 unique
tracepoints. The dynamic_events file would then show the following:
  u:test u64 count
  u:test u32 count

The actual tracepoint names look like this:
  test:[d5874fdac44]
  test:[d5914662cd4]

Deleting events via "!u:test u64 count" would only delete the first
tracepoint that matched that format. When the delete ABI is used all
events with the same name will be attempted to be deleted. If
per-version deletion is required, user programs should either not use
persistent events or delete them via dynamic_events.

Beau Belgrave (4):
  tracing/user_events: Prepare find/delete for same name events
  tracing/user_events: Introduce multi-format events
  selftests/user_events: Test multi-format events
  tracing/user_events: Document multi-format flag

 Documentation/trace/user_events.rst   |  23 +-
 include/uapi/linux/user_events.h  |   6 +-
 kernel/trace/trace_events_user.c  | 224 +-
 .../testing/selftests/user_events/abi_test.c  | 134 +++
 4 files changed, 325 insertions(+), 62 deletions(-)


base-commit: 610a9b8f49fbcf1100716370d3b5f6f884a2835a
-- 
2.34.1

[PATCH 1/4] tracing/user_events: Prepare find/delete for same name events

2024-01-23 Thread Beau Belgrave

The current code for finding and deleting events assumes that there will
never be cases when user_events are registered with the same name, but
different formats. In the future this scenario will exist to ensure
user programs can be updated or modify their events and run different
versions of their programs side-by-side without being blocked.

This change does not yet allow for multi-format events. If user_events
are registered with the same name but different arguments the programs
see the same return values as before. This change simply makes it
possible to easily accomodate for this in future changes.

Update find_user_event() to take in argument parameters and register
flags to accomodate future multi-format event scenarios. Have find
validate argument matching and return error pointers to cover address
in use cases, or allocation errors. Update callers to handle error
pointer logic.

Move delete_user_event() to use hash walking directly now that find has
changed. Delete all events found that match the register name, stop
if an error occurs and report back to the user.

Update user_fields_match() to cover list_empty() scenarios instead of
each callsite doing it now that find_user_event() uses it directly.

Signed-off-by: Beau Belgrave 
---
 kernel/trace/trace_events_user.c | 106 +--
 1 file changed, 58 insertions(+), 48 deletions(-)

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 9365ce407426..0480579ba563 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -202,6 +202,8 @@ static struct user_event_mm *user_event_mm_get(struct 
user_event_mm *mm);
 static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
 static void user_event_mm_put(struct user_event_mm *mm);
 static int destroy_user_event(struct user_event *user);
+static bool user_fields_match(struct user_event *user, int argc,
+ const char **argv);
 
 static u32 user_event_key(char *name)
 {
@@ -1493,17 +1495,24 @@ static int destroy_user_event(struct user_event *user)
 }
 
 static struct user_event *find_user_event(struct user_event_group *group,
- char *name, u32 *outkey)
+ char *name, int argc, const char 
**argv,
+ u32 flags, u32 *outkey)
 {
struct user_event *user;
u32 key = user_event_key(name);
 
*outkey = key;
 
-   hash_for_each_possible(group->register_table, user, node, key)
-   if (!strcmp(EVENT_NAME(user), name))
+   hash_for_each_possible(group->register_table, user, node, key) {
+   if (strcmp(EVENT_NAME(user), name))
+   continue;
+
+   if (user_fields_match(user, argc, argv))
return user_event_get(user);
 
+   return ERR_PTR(-EADDRINUSE);
+   }
+
return NULL;
 }
 
@@ -1860,6 +1869,9 @@ static bool user_fields_match(struct user_event *user, 
int argc,
struct list_head *head = &user->fields;
int i = 0;
 
+   if (argc == 0)
+   return list_empty(head);
+
list_for_each_entry_reverse(field, head, link) {
if (!user_field_match(field, argc, argv, &i))
return false;
@@ -1880,10 +1892,8 @@ static bool user_event_match(const char *system, const 
char *event,
match = strcmp(EVENT_NAME(user), event) == 0 &&
(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
 
-   if (match && argc > 0)
+   if (match)
match = user_fields_match(user, argc, argv);
-   else if (match && argc == 0)
-   match = list_empty(&user->fields);
 
return match;
 }
@@ -1922,11 +1932,11 @@ static int user_event_parse(struct user_event_group 
*group, char *name,
char *args, char *flags,
struct user_event **newuser, int reg_flags)
 {
-   int ret;
-   u32 key;
struct user_event *user;
+   char **argv = NULL;
int argc = 0;
-   char **argv;
+   int ret;
+   u32 key;
 
/* Currently don't support any text based flags */
if (flags != NULL)
@@ -1935,41 +1945,34 @@ static int user_event_parse(struct user_event_group 
*group, char *name,
if (!user_event_capable(reg_flags))
return -EPERM;
 
+   if (args) {
+   argv = argv_split(GFP_KERNEL, args, &argc);
+
+   if (!argv)
+   return -ENOMEM;
+   }
+
/* Prevent dyn_event from racing */
mutex_lock(&event_mutex);
-   user = find_user_event(group, name, &key);
+   user = find_user_event(group, name, argc, (const char **)argv,
+  reg_flags, &key);
mutex_unlock(&event_mutex);
 
-   if (user) {
-   if (args) {
-

[PATCH 4/4] tracing/user_events: Document multi-format flag

2024-01-23 Thread Beau Belgrave

User programs can now ask user_events to handle the synchronization of
multiple different formats for an event with the same name via the new
USER_EVENT_REG_MULTI_FORMAT flag.

Add a section for USER_EVENT_REG_MULTI_FORMAT that explains the intended
purpose and caveats of using it. Explain how deletion works in these
cases and how to use /sys/kernel/tracing/dynamic_events for per-version
deletion.

Signed-off-by: Beau Belgrave 
---
 Documentation/trace/user_events.rst | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/trace/user_events.rst 
b/Documentation/trace/user_events.rst
index d8f12442aaa6..35a2524bc73c 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -92,6 +92,20 @@ The following flags are currently supported.
   process closes or unregisters the event. Requires CAP_PERFMON otherwise
   -EPERM is returned.
 
++ USER_EVENT_REG_MULTI_FORMAT: The event can contain multiple formats. This
+  allows programs to prevent themselves from being blocked when their event
+  format changes and they wish to use the same name. When this flag is used the
+  tracepoint name will be in the new format of "name:[unique_id]" vs the older
+  format of "name". A tracepoint will be created for each unique pair of name
+  and format. This means if several processes use the same name and format,
+  they will use the same tracepoint. If yet another process uses the same name,
+  but a different format than the other processes, it will use a different
+  tracepoint with a new unique id. Recording programs need to scan tracefs for
+  the various different formats of the event name they are interested in
+  recording. The system name of the tracepoint will also use 
"user_events_multi"
+  instead of "user_events". This prevents single-format event names conflicting
+  with any multi-format event names within tracefs.
+
 Upon successful registration the following is set.
 
 + write_index: The index to use for this file descriptor that represents this
@@ -106,6 +120,9 @@ or perf record -e user_events:[name] when 
attaching/recording.
 **NOTE:** The event subsystem name by default is "user_events". Callers should
 not assume it will always be "user_events". Operators reserve the right in the
 future to change the subsystem name per-process to accommodate event isolation.
+In addition if the USER_EVENT_REG_MULTI_FORMAT flag is used the tracepoint name
+will have a unique id appended to it and the system name will be
+"user_events_multi" as described above.
 
 Command Format
 ^^
@@ -156,7 +173,11 @@ to request deletes than the one used for registration due 
to this.
 to the event. If programs do not want auto-delete, they must use the
 USER_EVENT_REG_PERSIST flag when registering the event. Once that flag is used
 the event exists until DIAG_IOCSDEL is invoked. Both register and delete of an
-event that persists requires CAP_PERFMON, otherwise -EPERM is returned.
+event that persists requires CAP_PERFMON, otherwise -EPERM is returned. When
+there are multiple formats of the same event name, all events with the same
+name will be attempted to be deleted. If only a specific version is wanted to
+be deleted then the /sys/kernel/tracing/dynamic_events file should be used for
+that specific format of the event.
 
 Unregistering
 -
-- 
2.34.1

[PATCH 3/4] selftests/user_events: Test multi-format events

2024-01-23 Thread Beau Belgrave

User_events now has multi-format events which allow for the same
register name, but with different formats. When this occurs, different
tracepoints are created with unique names.

Add a new test that ensures the same name can be used for two different
formats. Ensure they are isolated from each other and that name and arg
matching still works if yet another register comes in with the same
format as one of the two.

Signed-off-by: Beau Belgrave 
---
 .../testing/selftests/user_events/abi_test.c  | 134 ++
 1 file changed, 134 insertions(+)

diff --git a/tools/testing/selftests/user_events/abi_test.c 
b/tools/testing/selftests/user_events/abi_test.c
index cef1ff1af223..b3480fad65a5 100644
--- a/tools/testing/selftests/user_events/abi_test.c
+++ b/tools/testing/selftests/user_events/abi_test.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include "../kselftest_harness.h"
@@ -23,6 +25,62 @@
 
 const char *data_file = "/sys/kernel/tracing/user_events_data";
 const char *enable_file = 
"/sys/kernel/tracing/events/user_events/__abi_event/enable";
+const char *multi_dir_glob = 
"/sys/kernel/tracing/events/user_events_multi/__abi_event:*";
+
+static int wait_for_delete(char *dir)
+{
+   struct stat buf;
+   int i;
+
+   for (i = 0; i < 1; ++i) {
+   if (stat(dir, &buf) == -1 && errno == ENOENT)
+   return 0;
+
+   usleep(1000);
+   }
+
+   return -1;
+}
+
+static int find_multi_event_dir(char *unique_field, char *out_dir, int dir_len)
+{
+   char path[256];
+   glob_t buf;
+   int i, ret;
+
+   ret = glob(multi_dir_glob, GLOB_ONLYDIR, NULL, &buf);
+
+   if (ret)
+   return -1;
+
+   ret = -1;
+
+   for (i = 0; i < buf.gl_pathc; ++i) {
+   FILE *fp;
+
+   snprintf(path, sizeof(path), "%s/format", buf.gl_pathv[i]);
+   fp = fopen(path, "r");
+
+   if (!fp)
+   continue;
+
+   while (fgets(path, sizeof(path), fp) != NULL) {
+   if (strstr(path, unique_field)) {
+   fclose(fp);
+   /* strscpy is not available, use snprintf */
+   snprintf(out_dir, dir_len, "%s", 
buf.gl_pathv[i]);
+   ret = 0;
+   goto out;
+   }
+   }
+
+   fclose(fp);
+   }
+out:
+   globfree(&buf);
+
+   return ret;
+}
 
 static bool event_exists(void)
 {
@@ -74,6 +132,39 @@ static int event_delete(void)
return ret;
 }
 
+static int reg_enable_multi(void *enable, int size, int bit, int flags,
+   char *args)
+{
+   struct user_reg reg = {0};
+   char full_args[512] = {0};
+   int fd = open(data_file, O_RDWR);
+   int len;
+   int ret;
+
+   if (fd < 0)
+   return -1;
+
+   len = snprintf(full_args, sizeof(full_args), "__abi_event %s", args);
+
+   if (len > sizeof(full_args)) {
+   ret = -E2BIG;
+   goto out;
+   }
+
+   reg.size = sizeof(reg);
+   reg.name_args = (__u64)full_args;
+   reg.flags = USER_EVENT_REG_MULTI_FORMAT | flags;
+   reg.enable_bit = bit;
+   reg.enable_addr = (__u64)enable;
+   reg.enable_size = size;
+
+   ret = ioctl(fd, DIAG_IOCSREG, ®);
+out:
+   close(fd);
+
+   return ret;
+}
+
 static int reg_enable_flags(void *enable, int size, int bit, int flags)
 {
struct user_reg reg = {0};
@@ -207,6 +298,49 @@ TEST_F(user, bit_sizes) {
ASSERT_NE(0, reg_enable(&self->check, 128, 0));
 }
 
+TEST_F(user, multi_format) {
+   char first_dir[256];
+   char second_dir[256];
+   struct stat buf;
+
+   /* Multiple formats for the same name should work */
+   ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 0,
+ 0, "u32 multi_first"));
+
+   ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 1,
+ 0, "u64 multi_second"));
+
+   /* Same name with same format should also work */
+   ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 2,
+ 0, "u64 multi_second"));
+
+   ASSERT_EQ(0, find_multi_event_dir("multi_first",
+ first_dir, sizeof(first_dir)));
+
+   ASSERT_EQ(0, find_multi_event_dir("multi_second",
+ second_dir, sizeof(second_dir)));
+
+   /* Should not be found in the same dir */
+   ASSERT_NE(0, strcmp(first_dir, second_dir));
+
+   /* First dir should still exist */
+   ASSERT_EQ(0, stat(first_dir, &buf));
+
+   /* Disabling first register should remove first dir */
+   ASSERT_EQ(0, reg_disable(&self->check, 0));
+   ASSERT_EQ(0, wait_for_delete(first_dir));
+

[PATCH 2/4] tracing/user_events: Introduce multi-format events

2024-01-23 Thread Beau Belgrave

Currently user_events supports 1 event with the same name and must have
the exact same format when referenced by multiple programs. This opens
an opportunity for malicous or poorly thought through programs to
create events that others use with different formats. Another scenario
is user programs wishing to use the same event name but add more fields
later when the software updates. Various versions of a program may be
running side-by-side, which is prevented by the current single format
requirement.

Add a new register flag (USER_EVENT_REG_MULTI_FORMAT) which indicates
the user program wishes to use the same user_event name, but may have
several different formats of the event in the future. When this flag is
used, create the underlying tracepoint backing the user_event with a
unique name per-version of the format. It's important that existing ABI
users do not get this logic automatically, even if one of the multi
format events matches the format. This ensures existing programs that
create events and assume the tracepoint name will match exactly continue
to work as expected. Add logic to only check multi-format events with
other multi-format events and single-format events to only check
single-format events during find.

Change system name of the multi-format event tracepoint to ensure that
multi-format events are isolated completely from single-format events.

Add a register_name (reg_name) to the user_event struct which allows for
split naming of events. We now have the name that was used to register
within user_events as well as the unique name for the tracepoint. Upon
registering events ensure matches based on first the reg_name, followed
by the fields and format of the event. This allows for multiple events
with the same registered name to have different formats. The underlying
tracepoint will have a unique name in the format of {reg_name}:[unique_id].

For example, if both "test u32 value" and "test u64 value" are used with
the USER_EVENT_REG_MULTI_FORMAT the system would have 2 unique
tracepoints. The dynamic_events file would then show the following:
  u:test u64 count
  u:test u32 count

The actual tracepoint names look like this:
  test:[d5874fdac44]
  test:[d5914662cd4]

Both would be under the new user_events_multi system name to prevent the
older ABI from being used to squat on multi-formatted events and block
their use.

Deleting events via "!u:test u64 count" would only delete the first
tracepoint that matched that format. When the delete ABI is used all
events with the same name will be attempted to be deleted. If
per-version deletion is required, user programs should either not use
persistent events or delete them via dynamic_events.

Signed-off-by: Beau Belgrave 
---
 include/uapi/linux/user_events.h |   6 +-
 kernel/trace/trace_events_user.c | 118 +++
 2 files changed, 111 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
index f74f3aedd49c..a03de03dccbc 100644
--- a/include/uapi/linux/user_events.h
+++ b/include/uapi/linux/user_events.h
@@ -12,6 +12,7 @@
 #include 
 
 #define USER_EVENTS_SYSTEM "user_events"
+#define USER_EVENTS_MULTI_SYSTEM "user_events_multi"
 #define USER_EVENTS_PREFIX "u:"
 
 /* Create dynamic location entry within a 32-bit value */
@@ -22,8 +23,11 @@ enum user_reg_flag {
/* Event will not delete upon last reference closing */
USER_EVENT_REG_PERSIST  = 1U << 0,
 
+   /* Event will be allowed to have multiple formats */
+   USER_EVENT_REG_MULTI_FORMAT = 1U << 1,
+
/* This value or above is currently non-ABI */
-   USER_EVENT_REG_MAX  = 1U << 1,
+   USER_EVENT_REG_MAX  = 1U << 2,
 };
 
 /*
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 0480579ba563..f9c0781285b6 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -34,7 +34,8 @@
 
 /* Limit how long of an event name plus args within the subsystem. */
 #define MAX_EVENT_DESC 512
-#define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
+#define EVENT_NAME(user_event) ((user_event)->reg_name)
+#define EVENT_TP_NAME(user_event) ((user_event)->tracepoint.name)
 #define MAX_FIELD_ARRAY_SIZE 1024
 
 /*
@@ -54,10 +55,13 @@
  * allows isolation for events by various means.
  */
 struct user_event_group {
-   char*system_name;
-   struct  hlist_node node;
-   struct  mutex reg_mutex;
+   char*system_name;
+   char*system_multi_name;
+   struct hlist_node   node;
+   struct mutexreg_mutex;
DECLARE_HASHTABLE(register_table, 8);
+   /* ID that moves forward within the group for multi-event names */
+   u64 multi_id;
 };
 
 /* Group for init_user_ns mapping, top-most group */
@@ -78,6 +82,7 @@ static unsigned int current_user_events;
  */
 str

[PATCH] tracing: Include PPIN in mce_record tracepoint

2024-01-23 Thread Avadhut Naik

Machine Check Error information from struct mce is exported to userspace
through the mce_record tracepoint.

Currently, however, the PPIN (Protected Processor Inventory Number) field
of struct mce is not exported through the tracepoint.

Export PPIN through the tracepoint as it may provide useful information
for debug and analysis.

Signed-off-by: Avadhut Naik 
---
 include/trace/events/mce.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 1391ada0da3b..657b93ec8176 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -25,6 +25,7 @@ TRACE_EVENT(mce_record,
__field(u64,ipid)
__field(u64,ip  )
__field(u64,tsc )
+   __field(u64,ppin)
__field(u64,walltime)
__field(u32,cpu )
__field(u32,cpuid   )
@@ -45,6 +46,7 @@ TRACE_EVENT(mce_record,
__entry->ipid   = m->ipid;
__entry->ip = m->ip;
__entry->tsc= m->tsc;
+   __entry->ppin   = m->ppin;
__entry->walltime   = m->time;
__entry->cpu= m->extcpu;
__entry->cpuid  = m->cpuid;
@@ -55,7 +57,7 @@ TRACE_EVENT(mce_record,
__entry->cpuvendor  = m->cpuvendor;
),
 
-   TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, 
ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: 
%u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+   TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, 
ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: 
%llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
@@ -63,6 +65,7 @@ TRACE_EVENT(mce_record,
__entry->addr, __entry->misc, __entry->synd,
__entry->cs, __entry->ip,
__entry->tsc,
+   __entry->ppin,
__entry->cpuvendor, __entry->cpuid,
__entry->walltime,
__entry->socketid,

base-commit: 451b2bc29430fa147e36a48348f8b6b615fd6820
-- 
2.34.1

Re: [PATCH] tracing: Include PPIN in mce_record tracepoint

2024-01-23 Thread Tony Luck

On Tue, Jan 23, 2024 at 05:51:50PM -0600, Avadhut Naik wrote:
> Machine Check Error information from struct mce is exported to userspace
> through the mce_record tracepoint.
> 
> Currently, however, the PPIN (Protected Processor Inventory Number) field
> of struct mce is not exported through the tracepoint.
> 
> Export PPIN through the tracepoint as it may provide useful information
> for debug and analysis.

Awesome. I've been meaning to update the tracepoint for ages, but
it never gets to the top of the queue.

But some questions:

1) Are tracepoints a user visible ABI? Adding a new field in the middle
feels like it might be problematic. I asked this question many years
ago and Steven Rostedt said there was some tracing library in the works
that would make this OK for appplications using that library.

2) While you are adding to the tracepoint, should we batch up all
the useful changes that have been made to "struct mce". I think the
new fields that might be of use are:

__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
__u32 microcode;/* Microcode revision */

> 
> Signed-off-by: Avadhut Naik 
> ---
>  include/trace/events/mce.h | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
> index 1391ada0da3b..657b93ec8176 100644
> --- a/include/trace/events/mce.h
> +++ b/include/trace/events/mce.h
> @@ -25,6 +25,7 @@ TRACE_EVENT(mce_record,
>   __field(u64,ipid)
>   __field(u64,ip  )
>   __field(u64,tsc )
> + __field(u64,ppin)
>   __field(u64,walltime)
>   __field(u32,cpu )
>   __field(u32,cpuid   )
> @@ -45,6 +46,7 @@ TRACE_EVENT(mce_record,
>   __entry->ipid   = m->ipid;
>   __entry->ip = m->ip;
>   __entry->tsc= m->tsc;
> + __entry->ppin   = m->ppin;
>   __entry->walltime   = m->time;
>   __entry->cpu= m->extcpu;
>   __entry->cpuid  = m->cpuid;
> @@ -55,7 +57,7 @@ TRACE_EVENT(mce_record,
>   __entry->cpuvendor  = m->cpuvendor;
>   ),

... rest of patch trimmed.

-Tony

[PATCH] tracing: Include PPIN in mce_record tracepoint

2024-01-23 Thread Naik, Avadhut

Hi,

On 1/23/2024 6:12 PM, Tony Luck wrote:
> On Tue, Jan 23, 2024 at 05:51:50PM -0600, Avadhut Naik wrote:
>> Machine Check Error information from struct mce is exported to userspace
>> through the mce_record tracepoint.
>>
>> Currently, however, the PPIN (Protected Processor Inventory Number) field
>> of struct mce is not exported through the tracepoint.
>>
>> Export PPIN through the tracepoint as it may provide useful information
>> for debug and analysis.
> 
> Awesome. I've been meaning to update the tracepoint for ages, but
> it never gets to the top of the queue.
> 
> But some questions:
> 
> 1) Are tracepoints a user visible ABI? Adding a new field in the middle
> feels like it might be problematic. I asked this question many years
> ago and Steven Rostedt said there was some tracing library in the works
> that would make this OK for appplications using that library.
> 

I think they can be user visible through the "trace" and "trace_pipe" in
/sys/kernel/debug/tracing. But you will have to enable the events you want
to trace through /sys/kernel/debug/tracing/events//enable.

AFAIK, this (adding field in the middle) shouldn't be problematic as we
have the tracepoint format available in debugfs. For e.g. with this patch,
the format is as follows:

[root avadnaik]# cat /sys/kernel/debug/tracing/events/mce/mce_record/format 
name: mce_record
ID: 113
format:
field:unsigned short common_type;   offset:0;   size:2; 
signed:0;
field:unsigned char common_flags;   offset:2;   size:1; 
signed:0;
field:unsigned char common_preempt_count;   offset:3;   size:1; 
signed:0;
field:int common_pid;   offset:4;   size:4; signed:1;

field:u64 mcgcap;   offset:8;   size:8; signed:0;
field:u64 mcgstatus;offset:16;  size:8; signed:0;
field:u64 status;   offset:24;  size:8; signed:0;
field:u64 addr; offset:32;  size:8; signed:0;
field:u64 misc; offset:40;  size:8; signed:0;
field:u64 synd; offset:48;  size:8; signed:0;
field:u64 ipid; offset:56;  size:8; signed:0;
field:u64 ip;   offset:64;  size:8; signed:0;
field:u64 tsc;  offset:72;  size:8; signed:0;
field:u64 ppin; offset:80;  size:8; signed:0;
field:u64 walltime; offset:88;  size:8; signed:0;
field:u32 cpu;  offset:96;  size:4; signed:0;
field:u32 cpuid;offset:100; size:4; signed:0;
field:u32 apicid;   offset:104; size:4; signed:0;
field:u32 socketid; offset:108; size:4; signed:0;
field:u8 cs;offset:112; size:1; signed:0;
field:u8 bank;  offset:113; size:1; signed:0;
field:u8 cpuvendor; offset:114; size:1; signed:0;

print fmt: "CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, 
ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: 
%llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", REC->cpu, 
REC->mcgcap, REC->mcgstatus, REC->bank, REC->status, REC->ipid, REC->addr, 
REC->misc, REC->synd, REC->cs, REC->ip, REC->tsc, REC->ppin, REC->cpuvendor, 
REC->cpuid, REC->walltime, REC->socketid, REC->apicid


Just quickly tried with rasdaemon and things seem to be okay.

Also, not a cent percent sure, but the library you are mentioning of, I think
its the libtraceevent library and IIUC, it utilizes the above tracepoint format.

> 2) While you are adding to the tracepoint, should we batch up all
> the useful changes that have been made to "struct mce". I think the
> new fields that might be of use are:
> 
> __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
> __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
> __u64 ppin; /* Protected Processor Inventory Number */
> __u32 microcode;/* Microcode revision */
> 

synd and ipid are already a part of mce_record tracepoint. (They too have been
added in the middle). Will add the microcode field in the next version.

>>
>> Signed-off-by: Avadhut Naik 
>> ---
>>  include/trace/events/mce.h | 5 -
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
>> index 1391ada0da3b..657b93ec8176 100644
>> --- a/include/trace/events/mce.h
>> +++ b/include/trace/events/mce.h
>> @@ -25,6 +25,7 @@ TRACE_EVENT(mce_record,
>>  __field(u64,ipid)
>>  __field(u64,ip  )
>>  __field(u64,tsc )
>> +__field(u64,ppin)
>>  __field(u64,walltime)
>>  __field(u32,cpu )
>>  __field(u32,cpuid   )
>> @@ -45,6 +46,7 @@ TRACE_EVENT(mce_record,
>>  __entry->ipid

Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"

2024-01-23 Thread Google

On Tue, 23 Jan 2024 17:21:39 +0800
Ye Bin  wrote:

> This patch adds test cases for new print format type "%pd/%pD".The test cases
> test the following items:
> 1. Test README if add "%pd/%pD" type;
> 2. Test "%pd" type for dput();
> 3. Test "%pD" type for vfs_read();
> 
> Signed-off-by: Ye Bin 
> ---
>  .../ftrace/test.d/kprobe/kprobe_args_vfs.tc   | 79 +++
>  1 file changed, 79 insertions(+)
>  create mode 100644 
> tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
> 
> diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc 
> b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
> new file mode 100644
> index ..1d8edd294dd6
> --- /dev/null
> +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
> @@ -0,0 +1,79 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: GPL-2.0
> +# description: Kprobe event VFS type argument
> +# requires: kprobe_events
> +
> +case `uname -m` in
> +x86_64)
> +  ARG1=%di
> +;;
> +i[3456]86)
> +  ARG1=%ax
> +;;
> +aarch64)
> +  ARG1=%x0
> +;;
> +arm*)
> +  ARG1=%r0
> +;;
> +ppc64*)
> +  ARG1=%r3
> +;;
> +ppc*)
> +  ARG1=%r3

You can merge this ppc* and ppc64* cases :)

> +;;
> +s390*)
> +  ARG1=%r2
> +;;
> +mips*)
> +  ARG1=%r4
> +;;
> +loongarch*)
> +  ARG1=%r4
> +;;
> +riscv*)
> +  ARG1=%a0

Anyway, I wonder why don't you use '$arg1' instead of these registers.
Is there any reason?

Thank you,

> +;;
> +*)
> +  echo "Please implement other architecture here"
> +  exit_untested
> +esac
> +
> +: "Test argument %pd/%pD in README"
> +grep -q "%pd/%pD" README
> +
> +: "Test argument %pd with name"
> +echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events
> +echo 1 > events/kprobes/testprobe/enable
> +grep -q "1" events/kprobes/testprobe/enable
> +echo 0 > events/kprobes/testprobe/enable
> +grep "dput" trace | grep -q "enable"
> +echo "" > kprobe_events
> +echo "" > trace
> +
> +: "Test argument %pd without name"
> +echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events
> +echo 1 > events/kprobes/testprobe/enable
> +grep -q "1" events/kprobes/testprobe/enable
> +echo 0 > events/kprobes/testprobe/enable
> +grep "dput" trace | grep -q "enable"
> +echo "" > kprobe_events
> +echo "" > trace
> +
> +: "Test argument %pD with name"
> +echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events
> +echo 1 > events/kprobes/testprobe/enable
> +grep -q "1" events/kprobes/testprobe/enable
> +echo 0 > events/kprobes/testprobe/enable
> +grep "vfs_read" trace | grep -q "enable"
> +echo "" > kprobe_events
> +echo "" > trace
> +
> +: "Test argument %pD without name"
> +echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events
> +echo 1 > events/kprobes/testprobe/enable
> +grep -q "1"  events/kprobes/testprobe/enable
> +echo 0 > events/kprobes/testprobe/enable
> +grep "vfs_read" trace | grep -q "enable"
> +echo "" > kprobe_events
> +echo "" > trace
> -- 
> 2.31.1
> 


-- 
Masami Hiramatsu (Google)

Re: [PATCH] tracing: Include PPIN in mce_record tracepoint

2024-01-23 Thread Steven Rostedt

On Tue, 23 Jan 2024 19:29:52 -0600
"Naik, Avadhut"  wrote:

> > But some questions:
> > 
> > 1) Are tracepoints a user visible ABI? Adding a new field in the middle
> > feels like it might be problematic. I asked this question many years
> > ago and Steven Rostedt said there was some tracing library in the works
> > that would make this OK for appplications using that library.
> >   
> 
> I think they can be user visible through the "trace" and "trace_pipe" in
> /sys/kernel/debug/tracing. But you will have to enable the events you want
> to trace through /sys/kernel/debug/tracing/events//enable.
> 
> AFAIK, this (adding field in the middle) shouldn't be problematic as we
> have the tracepoint format available in debugfs. For e.g. with this patch,
> the format is as follows:
> 
> [root avadnaik]# cat /sys/kernel/debug/tracing/events/mce/mce_record/format 
> name: mce_record
> ID: 113
> format:
> field:unsigned short common_type;   offset:0;   size:2; 
> signed:0;
> field:unsigned char common_flags;   offset:2;   size:1; 
> signed:0;
> field:unsigned char common_preempt_count;   offset:3;   
> size:1; signed:0;
> field:int common_pid;   offset:4;   size:4; signed:1;
> 
> field:u64 mcgcap;   offset:8;   size:8; signed:0;
> field:u64 mcgstatus;offset:16;  size:8; signed:0;
> field:u64 status;   offset:24;  size:8; signed:0;
> field:u64 addr; offset:32;  size:8; signed:0;
> field:u64 misc; offset:40;  size:8; signed:0;
> field:u64 synd; offset:48;  size:8; signed:0;
> field:u64 ipid; offset:56;  size:8; signed:0;
> field:u64 ip;   offset:64;  size:8; signed:0;
> field:u64 tsc;  offset:72;  size:8; signed:0;
> field:u64 ppin; offset:80;  size:8; signed:0;
> field:u64 walltime; offset:88;  size:8; signed:0;
> field:u32 cpu;  offset:96;  size:4; signed:0;
> field:u32 cpuid;offset:100; size:4; signed:0;
> field:u32 apicid;   offset:104; size:4; signed:0;
> field:u32 socketid; offset:108; size:4; signed:0;
> field:u8 cs;offset:112; size:1; signed:0;
> field:u8 bank;  offset:113; size:1; signed:0;
> field:u8 cpuvendor; offset:114; size:1; signed:0;
> 
> print fmt: "CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, 
> ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: 
> %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", REC->cpu, 
> REC->mcgcap, REC->mcgstatus, REC->bank, REC->status, REC->ipid, REC->addr, 
> REC->misc, REC->synd, REC->cs, REC->ip, REC->tsc, REC->ppin, REC->cpuvendor, 
> REC->cpuid, REC->walltime, REC->socketid, REC->apicid
> 
> 
> Just quickly tried with rasdaemon and things seem to be okay.
> 
> Also, not a cent percent sure, but the library you are mentioning of, I think
> its the libtraceevent library and IIUC, it utilizes the above tracepoint 
> format.
> 

Yes, rasdaemon uses libtraceevent (or a copy of it internally) that
reads the format file to find fields. You can safely add fields to the
middle of the event structure and the parsing will be just fine.

-- Steve

Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"

2024-01-23 Thread yebin (H)





On 2024/1/24 9:32, Masami Hiramatsu (Google) wrote:

On Tue, 23 Jan 2024 17:21:39 +0800
Ye Bin  wrote:


This patch adds test cases for new print format type "%pd/%pD".The test cases
test the following items:
1. Test README if add "%pd/%pD" type;
2. Test "%pd" type for dput();
3. Test "%pD" type for vfs_read();

Signed-off-by: Ye Bin 
---
  .../ftrace/test.d/kprobe/kprobe_args_vfs.tc   | 79 +++
  1 file changed, 79 insertions(+)
  create mode 100644 
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index ..1d8edd294dd6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,79 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+  ARG1=%di
+;;
+i[3456]86)
+  ARG1=%ax
+;;
+aarch64)
+  ARG1=%x0
+;;
+arm*)
+  ARG1=%r0
+;;
+ppc64*)
+  ARG1=%r3
+;;
+ppc*)
+  ARG1=%r3

You can merge this ppc* and ppc64* cases :)


+;;
+s390*)
+  ARG1=%r2
+;;
+mips*)
+  ARG1=%r4
+;;
+loongarch*)
+  ARG1=%r4
+;;
+riscv*)
+  ARG1=%a0

Anyway, I wonder why don't you use '$arg1' instead of these registers.
Is there any reason?

Thank you,

Thank you for your advice.
Actually, I wrote the test case by referring to " 
kprobe_args_string.tc". I'll modify it

according to your suggestion.


+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+: "Test argument %pd/%pD in README"
+grep -q "%pd/%pD" README
+
+: "Test argument %pd with name"
+echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1"  events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
--
2.31.1

Re: [PATCH] virtio_net: Support RX hash XDP hint

2024-01-23 Thread Liang Chen

On Mon, Jan 22, 2024 at 7:10 PM Heng Qi  wrote:
>
> Hi Liang Chen,
>
> 在 2024/1/22 下午6:22, Liang Chen 写道:
> > The RSS hash report is a feature that's part of the virtio specification.
> > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> > (still a work in progress as per [1]) support this feature. While the
> > capability to obtain the RSS hash has been enabled in the normal path,
> > it's currently missing in the XDP path. Therefore, we are introducing XDP
> > hints through kfuncs to allow XDP programs to access the RSS hash.
> >
> > Signed-off-by: Liang Chen 
> > ---
> >   drivers/net/virtio_net.c | 56 
> >   1 file changed, 56 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index d7ce4a1011ea..1463a4709e3c 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct 
> > virtnet_info *vi, const int mtu)
> >   }
> >   }
> >
> > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> > +enum xdp_rss_hash_type *rss_type)
> > +{
> > + const struct xdp_buff *xdp = (void *)_ctx;
> > + struct virtio_net_hdr_v1_hash *hdr_hash;
> > + struct virtnet_info *vi;
> > +
> > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
>
> I think 'vi->has_rss_hash_report' should be used here.
> NETIF_F_RXHASH cannot guarantee that the hash report feature is negotiated,
> and accessing hash_report and hash_value is unsafe at this time.
>

My understanding is that rxhash in dev->features is turned on only if
the corresponding hw feature is set. We will double check on this.

> > + return -ENODATA;
> > +
> > + vi = netdev_priv(xdp->rxq->dev);
> > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
>
> If the virtio-net-hdr is overrided by the XDP prog, how can this be done
> correctly and as expected?
>

Yeah, thanks for bringing up this concern. We are actively working on
a fix of this issue(possibly with a wrapper structure of xdp_buff).

Thanks,
Liang

> Thanks,
> Heng
>
> > +
> > + switch (__le16_to_cpu(hdr_hash->hash_report)) {
> > + case VIRTIO_NET_HASH_REPORT_TCPv4:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv4:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_TCPv6:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv6:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv4:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV4;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv6:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV6;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_NONE:
> > + default:
> > + *rss_type = XDP_RSS_TYPE_NONE;
> > + }
> > +
> > + *hash = __le32_to_cpu(hdr_hash->hash_value);
> > + return 0;
> > +}
> > +
> > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> > + .xmo_rx_hash= virtnet_xdp_rx_hash,
> > +};
> > +
> >   static int virtnet_probe(struct virtio_device *vdev)
> >   {
> >   int i, err = -ENOMEM;
> > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev)
> >   dev->ethtool_ops = &virtnet_ethtool_ops;
> >   SET_NETDEV_DEV(dev, &vdev->dev);
> >
> > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
> > +
> >   /* Do we support "hardware" checksums? */
> >   if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >   /* This opens up the world of extra features. */
>

Re: [PATCH] virtio_net: Support RX hash XDP hint

2024-01-23 Thread Jason Wang

On Mon, Jan 22, 2024 at 6:23 PM Liang Chen  wrote:
>
> The RSS hash report is a feature that's part of the virtio specification.
> Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> (still a work in progress as per [1]) support this feature. While the
> capability to obtain the RSS hash has been enabled in the normal path,
> it's currently missing in the XDP path. Therefore, we are introducing XDP
> hints through kfuncs to allow XDP programs to access the RSS hash.
>
> Signed-off-by: Liang Chen 
> ---
>  drivers/net/virtio_net.c | 56 
>  1 file changed, 56 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d7ce4a1011ea..1463a4709e3c 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct 
> virtnet_info *vi, const int mtu)
> }
>  }
>
> +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> +  enum xdp_rss_hash_type *rss_type)
> +{
> +   const struct xdp_buff *xdp = (void *)_ctx;
> +   struct virtio_net_hdr_v1_hash *hdr_hash;
> +   struct virtnet_info *vi;
> +
> +   if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
> +   return -ENODATA;
> +
> +   vi = netdev_priv(xdp->rxq->dev);
> +   hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);

Is there a guarantee that the hdr is not modified?

Thanks

Re: [PATCH] virtio_net: Support RX hash XDP hint

2024-01-23 Thread Liang Chen

On Tue, Jan 23, 2024 at 3:02 PM Michael S. Tsirkin  wrote:
>
> On Mon, Jan 22, 2024 at 06:22:56PM +0800, Liang Chen wrote:
> > The RSS hash report is a feature that's part of the virtio specification.
> > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> > (still a work in progress as per [1]) support this feature. While the
> > capability to obtain the RSS hash has been enabled in the normal path,
> > it's currently missing in the XDP path. Therefore, we are introducing XDP
> > hints through kfuncs to allow XDP programs to access the RSS hash.
> >
> > Signed-off-by: Liang Chen 
> > ---
> >  drivers/net/virtio_net.c | 56 
> >  1 file changed, 56 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index d7ce4a1011ea..1463a4709e3c 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct 
> > virtnet_info *vi, const int mtu)
> >   }
> >  }
> >
> > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> > +enum xdp_rss_hash_type *rss_type)
> > +{
> > + const struct xdp_buff *xdp = (void *)_ctx;
> > + struct virtio_net_hdr_v1_hash *hdr_hash;
> > + struct virtnet_info *vi;
> > +
> > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
> > + return -ENODATA;
> > +
> > + vi = netdev_priv(xdp->rxq->dev);
> > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
> > +
> > + switch (__le16_to_cpu(hdr_hash->hash_report)) {
> > + case VIRTIO_NET_HASH_REPORT_TCPv4:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv4:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_TCPv6:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv6:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv4:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV4;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv6:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV6;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
> > + break;
> > + case VIRTIO_NET_HASH_REPORT_NONE:
> > + default:
> > + *rss_type = XDP_RSS_TYPE_NONE;
> > + }
> > +
> > + *hash = __le32_to_cpu(hdr_hash->hash_value);
> > + return 0;
> > +}
> > +
> > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> > + .xmo_rx_hash= virtnet_xdp_rx_hash,
> > +};
> > +
> >  static int virtnet_probe(struct virtio_device *vdev)
> >  {
> >   int i, err = -ENOMEM;
> > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev)
> >   dev->ethtool_ops = &virtnet_ethtool_ops;
> >   SET_NETDEV_DEV(dev, &vdev->dev);
> >
> > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
> > +
>
> How about making this assignment depend on
>
> xdp->rxq->dev->features & NETIF_F_RXHASH
>

Thanks! How about dev->hw_features & NETIF_F_RXHASH? dev->features &
NETIF_F_RXHASH has not been enabled at this point.

> ?
>
> >   /* Do we support "hardware" checksums? */
> >   if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >   /* This opens up the world of extra features. */
> > --
> > 2.40.1
>

Re: [PATCH -next v6 0/2] Make memory reclamation measurable

2024-01-23 Thread Bixuan Cui


ping~

在 2024/1/5 9:36, Bixuan Cui 写道:

When the system memory is low, kswapd reclaims the memory. The key steps
of memory reclamation include
1.shrink_lruvec
   * shrink_active_list, moves folios from the active LRU to the inactive LRU
   * shrink_inactive_list, shrink lru from inactive LRU list
2.shrink_slab
   * shrinker->count_objects(), calculates the freeable memory
   * shrinker->scan_objects(), reclaims the slab memory

The existing tracers in the vmscan are as follows:

--do_try_to_free_pages
--shrink_zones
--trace_mm_vmscan_node_reclaim_begin (tracer)
--shrink_node
--shrink_node_memcgs
   --trace_mm_vmscan_memcg_shrink_begin (tracer)
   --shrink_lruvec
 --shrink_list
   --shrink_active_list
  --trace_mm_vmscan_lru_shrink_active (tracer)
   --shrink_inactive_list
  --trace_mm_vmscan_lru_shrink_inactive (tracer)
 --shrink_active_list
   --shrink_slab
 --do_shrink_slab
 --shrinker->count_objects()
 --trace_mm_shrink_slab_start (tracer)
 --shrinker->scan_objects()
 --trace_mm_shrink_slab_end (tracer)
   --trace_mm_vmscan_memcg_shrink_end (tracer)
--trace_mm_vmscan_node_reclaim_end (tracer)

If we get the duration and quantity of shrink lru and slab,
then we can measure the memory recycling, as follows

Measuring memory reclamation with bpf:
   LRU FILE:
CPU COMMShrinkActive(us) ShrinkInactive(us)  Reclaim(page)
7   kswapd0 26  51  32
7   kswapd0 52  47  13
   SLAB:
CPU COMMOBJ_NAMECount_Dur(us) 
Freeable(page) Scan_Dur(us) Reclaim(page)
 1  kswapd0 super_cache_scan.cfi_jt 2   341 
   3225 128
 7  kswapd0 super_cache_scan.cfi_jt 0   
2247   8524 1024
 7  kswapd0 super_cache_scan.cfi_jt 23670   
   00

For this, add the new tracer to shrink_active_list/shrink_inactive_list
and shrinker->count_objects().

Changes:
v6: * Add Reviewed-by from Steven Rostedt.
v5: * Use 'DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template' to
replace 'RACE_EVENT(mm_vmscan_lru_shrink_inactive/active_start'
 * Add the explanation for adding new shrink lru events into 'mm: vmscan: 
add new event to trace shrink lru'
v4: Add Reviewed-by and Changlog to every patch.
v3: Swap the positions of 'nid' and 'freeable' to prevent the hole in the trace 
event.
v2: Modify trace_mm_vmscan_lru_shrink_inactive() in evict_folios() at the same 
time to fix build error.

cuibixuan (2):
   mm: shrinker: add new event to trace shrink count
   mm: vmscan: add new event to trace shrink lru

  include/trace/events/vmscan.h | 80 ++-
  mm/shrinker.c |  4 ++
  mm/vmscan.c   | 11 +++--
  3 files changed, 90 insertions(+), 5 deletions(-)

Re: [PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name

2024-01-23 Thread yebin (H)





On 2024/1/23 22:40, Masami Hiramatsu (Google) wrote:

On Tue, 23 Jan 2024 17:21:35 +0800
Ye Bin  wrote:


Similar to '%pd' for printk, use '%pd' for print struct dentry's name.

Signed-off-by: Ye Bin 
---
  kernel/trace/trace_kprobe.c | 6 ++
  kernel/trace/trace_probe.h  | 1 +
  2 files changed, 7 insertions(+)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c4c6e0e0068b..00b74530fbad 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char 
*argv[])
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
+   char dbuf[MAX_DENTRY_ARGS_LEN];

Hmm, no, I don't like to expand stack anymore. Please allocate it
from heap.
Do I need to change the other buffers on the stacks to allocate memory 
from heap?

struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
  
  	switch (argv[0][0]) {

@@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char 
*argv[])
argv = new_argv;
}
  
+	ret = traceprobe_expand_dentry_args(argc, argv, dbuf,

+   MAX_DENTRY_ARGS_LEN);
+   if (ret)
+   goto out;

And calling this here will not cover the trace_fprobe.

Could you call this from traceprobe_expand_meta_args() instead of
calling it directly from trace_kprobe? Then it can be used from
fprobe_event too.

Thank you,
At first I wanted to implement the extension logic in 
traceprobe_expand_meta_args(),
but I found that the code was difficult to understand when I started 
writing. If fprobe_event
wants to support this function, is traceprobe_expand_dentry_args() also 
called? Or re-encapsulate
an interface to include the logic of different extensions. In this way, 
the same buffer is used for
the entire extension process, and the extension function needs to return 
the information about

the length of the buffer.


+
/* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
argc, is_return);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 553371a4e0b1..d9c053824975 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -34,6 +34,7 @@
  #define MAX_ARRAY_LEN 64
  #define MAX_ARG_NAME_LEN  32
  #define MAX_BTF_ARGS_LEN  128
+#define MAX_DENTRY_ARGS_LEN256

Why do you think
I determined this value according to the extreme case that a parameter 
is expanded to occupy

64 bytes, and a maximum of four such large parameters are supported.

  #define MAX_STRING_SIZE   PATH_MAX
  #define MAX_ARG_BUF_LEN   (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN)
  
--

2.31.1

Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"

2024-01-23 Thread yebin (H)





On 2024/1/24 9:32, Masami Hiramatsu (Google) wrote:

On Tue, 23 Jan 2024 17:21:39 +0800
Ye Bin  wrote:


This patch adds test cases for new print format type "%pd/%pD".The test cases
test the following items:
1. Test README if add "%pd/%pD" type;
2. Test "%pd" type for dput();
3. Test "%pD" type for vfs_read();

Signed-off-by: Ye Bin 
---
  .../ftrace/test.d/kprobe/kprobe_args_vfs.tc   | 79 +++
  1 file changed, 79 insertions(+)
  create mode 100644 
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index ..1d8edd294dd6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,79 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+  ARG1=%di
+;;
+i[3456]86)
+  ARG1=%ax
+;;
+aarch64)
+  ARG1=%x0
+;;
+arm*)
+  ARG1=%r0
+;;
+ppc64*)
+  ARG1=%r3
+;;
+ppc*)
+  ARG1=%r3

You can merge this ppc* and ppc64* cases :)


+;;
+s390*)
+  ARG1=%r2
+;;
+mips*)
+  ARG1=%r4
+;;
+loongarch*)
+  ARG1=%r4
+;;
+riscv*)
+  ARG1=%a0

Anyway, I wonder why don't you use '$arg1' instead of these registers.
Is there any reason?

Thank you,
I looked at the parameter parsing code again, and using "$arg1" requires 
the kernel to

enable the CONFIG_HAVE_FUNCTION_ARG_ACCESS_API configuration.

+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+: "Test argument %pd/%pD in README"
+grep -q "%pd/%pD" README
+
+: "Test argument %pd with name"
+echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1"  events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
--
2.31.1

Re: [PATCH v7 04/15] x86/sgx: Implement basic EPC misc cgroup functionality

2024-01-23 Thread Haitao Huang

On Mon, 22 Jan 2024 14:25:53 -0600, Jarkko Sakkinen   
wrote:



On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote:

From: Kristen Carlson Accardi 

SGX Enclave Page Cache (EPC) memory allocations are separate from normal
RAM allocations, and are managed solely by the SGX subsystem. The
existing cgroup memory controller cannot be used to limit or account for
SGX EPC memory, which is a desirable feature in some environments.  For
example, in a Kubernates environment, a user can request certain EPC
quota for a pod but the orchestrator can not enforce the quota to limit
runtime EPC usage of the pod without an EPC cgroup controller.

Utilize the misc controller [admin-guide/cgroup-v2.rst, 5-9. Misc] to
limit and track EPC allocations per cgroup. Earlier patches have added
the "sgx_epc" resource type in the misc cgroup subsystem. Add basic
support in SGX driver as the "sgx_epc" resource provider:

- Set "capacity" of EPC by calling misc_cg_set_capacity()
- Update EPC usage counter, "current", by calling charge and uncharge
APIs for EPC allocation and deallocation, respectively.
- Setup sgx_epc resource type specific callbacks, which perform
initialization and cleanup during cgroup allocation and deallocation,
respectively.

With these changes, the misc cgroup controller enables user to set a  
hard

limit for EPC usage in the "misc.max" interface file. It reports current
usage in "misc.current", the total EPC memory available in
"misc.capacity", and the number of times EPC usage reached the max limit
in "misc.events".

For now, the EPC cgroup simply blocks additional EPC allocation in
sgx_alloc_epc_page() when the limit is reached. Reclaimable pages are
still tracked in the global active list, only reclaimed by the global
reclaimer when the total free page count is lower than a threshold.

Later patches will reorganize the tracking and reclamation code in the
global reclaimer and implement per-cgroup tracking and reclaiming.

Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
Signed-off-by: Kristen Carlson Accardi 
Co-developed-by: Haitao Huang 
Signed-off-by: Haitao Huang 


For consistency sake I'd also add co-developed-by for Kristen. This is
at least the format suggested by kernel documentation.

She is the "From Author", so only Signed-off-by is needed for her  
according to the second example in the doc[1]?


Thanks
Haitao
[1]https://docs.kernel.org/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by

Re: [PATCH] virtio_net: Support RX hash XDP hint

2024-01-23 Thread Xuan Zhuo

On Wed, 24 Jan 2024 10:04:51 +0800, Liang Chen  
wrote:
> On Mon, Jan 22, 2024 at 7:10 PM Heng Qi  wrote:
> >
> > Hi Liang Chen,
> >
> > 在 2024/1/22 下午6:22, Liang Chen 写道:
> > > The RSS hash report is a feature that's part of the virtio specification.
> > > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
> > > (still a work in progress as per [1]) support this feature. While the
> > > capability to obtain the RSS hash has been enabled in the normal path,
> > > it's currently missing in the XDP path. Therefore, we are introducing XDP
> > > hints through kfuncs to allow XDP programs to access the RSS hash.
> > >
> > > Signed-off-by: Liang Chen 
> > > ---
> > >   drivers/net/virtio_net.c | 56 
> > >   1 file changed, 56 insertions(+)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index d7ce4a1011ea..1463a4709e3c 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct 
> > > virtnet_info *vi, const int mtu)
> > >   }
> > >   }
> > >
> > > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
> > > +enum xdp_rss_hash_type *rss_type)
> > > +{
> > > + const struct xdp_buff *xdp = (void *)_ctx;
> > > + struct virtio_net_hdr_v1_hash *hdr_hash;
> > > + struct virtnet_info *vi;
> > > +
> > > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
> >
> > I think 'vi->has_rss_hash_report' should be used here.
> > NETIF_F_RXHASH cannot guarantee that the hash report feature is negotiated,
> > and accessing hash_report and hash_value is unsafe at this time.
> >
>
> My understanding is that rxhash in dev->features is turned on only if
> the corresponding hw feature is set. We will double check on this.
>
> > > + return -ENODATA;
> > > +
> > > + vi = netdev_priv(xdp->rxq->dev);
> > > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - 
> > > vi->hdr_len);
> >
> > If the virtio-net-hdr is overrided by the XDP prog, how can this be done
> > correctly and as expected?
> >
>
> Yeah, thanks for bringing up this concern. We are actively working on
> a fix of this issue(possibly with a wrapper structure of xdp_buff).

Are there some places to save the hash before run xdp?

Thanks.


>
> Thanks,
> Liang
>
> > Thanks,
> > Heng
> >
> > > +
> > > + switch (__le16_to_cpu(hdr_hash->hash_report)) {
> > > + case VIRTIO_NET_HASH_REPORT_TCPv4:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_UDPv4:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_TCPv6:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_UDPv6:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_IPv4:
> > > + *rss_type = XDP_RSS_TYPE_L3_IPV4;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_IPv6:
> > > + *rss_type = XDP_RSS_TYPE_L3_IPV6;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
> > > + break;
> > > + case VIRTIO_NET_HASH_REPORT_NONE:
> > > + default:
> > > + *rss_type = XDP_RSS_TYPE_NONE;
> > > + }
> > > +
> > > + *hash = __le32_to_cpu(hdr_hash->hash_value);
> > > + return 0;
> > > +}
> > > +
> > > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
> > > + .xmo_rx_hash= virtnet_xdp_rx_hash,
> > > +};
> > > +
> > >   static int virtnet_probe(struct virtio_device *vdev)
> > >   {
> > >   int i, err = -ENOMEM;
> > > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev)
> > >   dev->ethtool_ops = &virtnet_ethtool_ops;
> > >   SET_NETDEV_DEV(dev, &vdev->dev);
> > >
> > > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
> > > +
> > >   /* Do we support "hardware" checksums? */
> > >   if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> > >   /* This opens up the world of extra features. */
> >
>

Re: [PATCH 1/3] dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem

2024-01-23 Thread Krzysztof Kozlowski

On 23/01/2024 22:03, Luca Weiss wrote:
> From: Vladimir Lypak 
> 
> Add a new define for the GCC_MDSS_BCR found on MSM8953.
> 
> Signed-off-by: Vladimir Lypak 
> [luca: expand commit message]
> Signed-off-by: Luca Weiss 

Acked-by: Krzysztof Kozlowski 

Best regards,
Krzysztof

84 matches

Mail list logo