Re: [PATCH net-next v2] vsock/test: add '--peer-port' input argument
On Tue, Jan 23, 2024 at 10:27:50AM +0300, Arseniy Krasnov wrote: Implement port for given CID as input argument instead of using hardcoded value '1234'. This allows to run different test instances on a single CID. Port argument is not required parameter and if it is not set, then default value will be '1234' - thus we preserve previous behaviour. Signed-off-by: Arseniy Krasnov --- Changelog: v1 -> v2: * Reword usage message. * Add commas after last field in 'opts' declaration. * 'RFC' -> 'net-next'. Thanks for the changes, LGTM! Reviewed-by: Stefano Garzarella tools/testing/vsock/util.c| 17 +++- tools/testing/vsock/util.h| 4 + tools/testing/vsock/vsock_diag_test.c | 21 +++-- tools/testing/vsock/vsock_test.c | 102 +- tools/testing/vsock/vsock_test_zerocopy.c | 12 +-- tools/testing/vsock/vsock_uring_test.c| 17 +++- 6 files changed, 115 insertions(+), 58 deletions(-) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index ae2b33c21c45..554b290fefdc 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -33,8 +33,7 @@ void init_signals(void) signal(SIGPIPE, SIG_IGN); } -/* Parse a CID in string representation */ -unsigned int parse_cid(const char *str) +static unsigned int parse_uint(const char *str, const char *err_str) { char *endptr = NULL; unsigned long n; @@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str) errno = 0; n = strtoul(str, &endptr, 10); if (errno || *endptr != '\0') { - fprintf(stderr, "malformed CID \"%s\"\n", str); + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str); exit(EXIT_FAILURE); } return n; } +/* Parse a CID in string representation */ +unsigned int parse_cid(const char *str) +{ + return parse_uint(str, "CID"); +} + +/* Parse a port in string representation */ +unsigned int parse_port(const char *str) +{ + return parse_uint(str, "port"); +} + /* Wait for the remote to close the connection */ void vsock_wait_remote_close(int fd) { diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 03c88d0cb861..e95e62485959 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -12,10 +12,13 @@ enum test_mode { TEST_MODE_SERVER }; +#define DEFAULT_PEER_PORT 1234 + /* Test runner options */ struct test_opts { enum test_mode mode; unsigned int peer_cid; + unsigned int peer_port; }; /* A test case definition. Test functions must print failures to stderr and @@ -35,6 +38,7 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); +unsigned int parse_port(const char *str); int vsock_stream_connect(unsigned int cid, unsigned int port); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index fa927ad16f8a..9d61b1f1c4c3 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -342,7 +342,7 @@ static void test_listen_socket_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -378,7 +378,7 @@ static void test_connect_client(const struct test_opts *opts) LIST_HEAD(sockets); struct vsock_stat *st; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -403,7 +403,7 @@ static void test_connect_server(const struct test_opts *opts) LIST_HEAD(sockets); int client_fd; - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (client_fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -461,6 +461,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'p', }, + { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, { .name = "list", .has_arg = no_argument, @@ -481,7 +486,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= --mode=client|server --peer-cid= [--list] [--skip=]\n" + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=] --control-port= --mode=client
[PATCH v7 0/4] remoteproc: qcom: Introduce DSP support for SM8650
Add the bindings and driver changes for DSP support on the SM8650 platform in order to enable the aDSP, cDSP and MPSS subsystems to boot. Compared to SM8550, where SM8650 uses the same dual firmware files, (dtb file and main firmware) the memory zones requirement has changed: - cDSP: now requires 2 memory zones to be configured as shared between the cDSP and the HLOS subsystem - MPSS: In addition to the memory zone required for the SM8550 MPSS, two more are required to be configured for MPSS usage only. In order to handle this and avoid code duplication, the region_assign_* code patch has been made more generic and is able handle multiple DSP-only memory zones (for MPSS) or DSP-HLOS shared memory zones (cDSP) in the same region_assign functions. Dependencies: None For convenience, a regularly refreshed linux-next based git tree containing all the SM8650 related work is available at: https://git.codelinaro.org/neil.armstrong/linux/-/tree/topic/sm8650/upstream/integ Signed-off-by: Neil Armstrong --- Changes in v7: - Rebased on v6.8-rc1 - Add another memory region for MPSS, in bindings, code and DT - Kepts Krzysztof's review on bindings after agreement on irc - Kept drivers patches reviews because it's only a miminal change (value 2 -> 3) - Link to v6: https://lore.kernel.org/r/20231218-topic-sm8650-upstream-remoteproc-v6-0-3d16b37f1...@linaro.org Changes in v6: - Rebased on next-20231218, last patch did not apply anymore - Link to v5: https://lore.kernel.org/r/20231212-topic-sm8650-upstream-remoteproc-v5-0-e749a1a48...@linaro.org Changes in v5: - Rename _perms to _owners per Konrad suggestion - Link to v4: https://lore.kernel.org/r/20231208-topic-sm8650-upstream-remoteproc-v4-0-a96c3e5f0...@linaro.org Changes in v4: - Collected review from Mukesh Ojha - Fixed adsp_unassign_memory_region() as suggested by Mukesh Ojha - Link to v3: https://lore.kernel.org/r/20231106-topic-sm8650-upstream-remoteproc-v3-0-dbd4cabae...@linaro.org Changes in v3: - Collected bindings review tags - Small fixes suggested by Mukesh Ojha - Link to v2: https://lore.kernel.org/r/20231030-topic-sm8650-upstream-remoteproc-v2-0-609ee572e...@linaro.org Changes in v2: - Fixed sm8650 entries in allOf:if:then to match Krzysztof's comments - Collected reviewed-by on patch 3 - Link to v1: https://lore.kernel.org/r/20231025-topic-sm8650-upstream-remoteproc-v1-0-a8d20e4ce...@linaro.org --- Neil Armstrong (4): dt-bindings: remoteproc: qcom,sm8550-pas: document the SM8650 PAS remoteproc: qcom: pas: make region assign more generic remoteproc: qcom: pas: Add SM8650 remoteproc support arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for mpss .../bindings/remoteproc/qcom,sm8550-pas.yaml | 45 ++- arch/arm64/boot/dts/qcom/sm8650.dtsi | 8 +- drivers/remoteproc/qcom_q6v5_pas.c | 150 - 3 files changed, 167 insertions(+), 36 deletions(-) --- base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d change-id: 20231016-topic-sm8650-upstream-remoteproc-66a87eeb6fee Best regards, -- Neil Armstrong
[PATCH v7 1/4] dt-bindings: remoteproc: qcom,sm8550-pas: document the SM8650 PAS
Document the DSP Peripheral Authentication Service on the SM8650 Platform. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong --- .../bindings/remoteproc/qcom,sm8550-pas.yaml | 45 +- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml index 58120829fb06..5f63b6b9a8f5 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml @@ -19,6 +19,9 @@ properties: - qcom,sm8550-adsp-pas - qcom,sm8550-cdsp-pas - qcom,sm8550-mpss-pas + - qcom,sm8650-adsp-pas + - qcom,sm8650-cdsp-pas + - qcom,sm8650-mpss-pas reg: maxItems: 1 @@ -49,6 +52,8 @@ properties: - description: Memory region for main Firmware authentication - description: Memory region for Devicetree Firmware authentication - description: DSM Memory region + - description: DSM Memory region 2 + - description: Memory region for Qlink Logging required: - compatible @@ -63,6 +68,7 @@ allOf: enum: - qcom,sm8550-adsp-pas - qcom,sm8550-cdsp-pas +- qcom,sm8650-adsp-pas then: properties: interrupts: @@ -71,7 +77,26 @@ allOf: maxItems: 5 memory-region: maxItems: 2 -else: + - if: + properties: +compatible: + enum: +- qcom,sm8650-cdsp-pas +then: + properties: +interrupts: + maxItems: 5 +interrupt-names: + maxItems: 5 +memory-region: + minItems: 3 + maxItems: 3 + - if: + properties: +compatible: + enum: +- qcom,sm8550-mpss-pas +then: properties: interrupts: minItems: 6 @@ -79,12 +104,28 @@ allOf: minItems: 6 memory-region: minItems: 3 + maxItems: 3 + - if: + properties: +compatible: + enum: +- qcom,sm8650-mpss-pas +then: + properties: +interrupts: + minItems: 6 +interrupt-names: + minItems: 6 +memory-region: + minItems: 5 + maxItems: 5 - if: properties: compatible: enum: - qcom,sm8550-adsp-pas +- qcom,sm8650-adsp-pas then: properties: power-domains: @@ -101,6 +142,7 @@ allOf: compatible: enum: - qcom,sm8550-mpss-pas +- qcom,sm8650-mpss-pas then: properties: power-domains: @@ -116,6 +158,7 @@ allOf: compatible: enum: - qcom,sm8550-cdsp-pas +- qcom,sm8650-cdsp-pas then: properties: power-domains: -- 2.34.1
[PATCH v7 2/4] remoteproc: qcom: pas: make region assign more generic
The current memory region assign only supports a single memory region. But new platforms introduces more regions to make the memory requirements more flexible for various use cases. Those new platforms also shares the memory region between the DSP and HLOS. To handle this, make the region assign more generic in order to support more than a single memory region and also permit setting the regions permissions as shared. Reviewed-by: Mukesh Ojha Signed-off-by: Neil Armstrong --- drivers/remoteproc/qcom_q6v5_pas.c | 100 - 1 file changed, 66 insertions(+), 34 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index a9dd58608052..09e8ad9f08c4 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -33,6 +33,8 @@ #define ADSP_DECRYPT_SHUTDOWN_DELAY_MS 100 +#define MAX_ASSIGN_COUNT 3 + struct adsp_data { int crash_reason_smem; const char *firmware_name; @@ -51,6 +53,9 @@ struct adsp_data { int ssctl_id; int region_assign_idx; + int region_assign_count; + bool region_assign_shared; + int region_assign_vmid; }; struct qcom_adsp { @@ -87,15 +92,18 @@ struct qcom_adsp { phys_addr_t dtb_mem_phys; phys_addr_t mem_reloc; phys_addr_t dtb_mem_reloc; - phys_addr_t region_assign_phys; + phys_addr_t region_assign_phys[MAX_ASSIGN_COUNT]; void *mem_region; void *dtb_mem_region; size_t mem_size; size_t dtb_mem_size; - size_t region_assign_size; + size_t region_assign_size[MAX_ASSIGN_COUNT]; int region_assign_idx; - u64 region_assign_perms; + int region_assign_count; + bool region_assign_shared; + int region_assign_vmid; + u64 region_assign_owners[MAX_ASSIGN_COUNT]; struct qcom_rproc_glink glink_subdev; struct qcom_rproc_subdev smd_subdev; @@ -590,37 +598,53 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp) static int adsp_assign_memory_region(struct qcom_adsp *adsp) { - struct reserved_mem *rmem = NULL; - struct qcom_scm_vmperm perm; + struct qcom_scm_vmperm perm[MAX_ASSIGN_COUNT]; struct device_node *node; + unsigned int perm_size; + int offset; int ret; if (!adsp->region_assign_idx) return 0; - node = of_parse_phandle(adsp->dev->of_node, "memory-region", adsp->region_assign_idx); - if (node) - rmem = of_reserved_mem_lookup(node); - of_node_put(node); - if (!rmem) { - dev_err(adsp->dev, "unable to resolve shareable memory-region\n"); - return -EINVAL; - } + for (offset = 0; offset < adsp->region_assign_count; ++offset) { + struct reserved_mem *rmem = NULL; + + node = of_parse_phandle(adsp->dev->of_node, "memory-region", + adsp->region_assign_idx + offset); + if (node) + rmem = of_reserved_mem_lookup(node); + of_node_put(node); + if (!rmem) { + dev_err(adsp->dev, "unable to resolve shareable memory-region index %d\n", + offset); + return -EINVAL; + } - perm.vmid = QCOM_SCM_VMID_MSS_MSA; - perm.perm = QCOM_SCM_PERM_RW; + if (adsp->region_assign_shared) { + perm[0].vmid = QCOM_SCM_VMID_HLOS; + perm[0].perm = QCOM_SCM_PERM_RW; + perm[1].vmid = adsp->region_assign_vmid; + perm[1].perm = QCOM_SCM_PERM_RW; + perm_size = 2; + } else { + perm[0].vmid = adsp->region_assign_vmid; + perm[0].perm = QCOM_SCM_PERM_RW; + perm_size = 1; + } - adsp->region_assign_phys = rmem->base; - adsp->region_assign_size = rmem->size; - adsp->region_assign_perms = BIT(QCOM_SCM_VMID_HLOS); + adsp->region_assign_phys[offset] = rmem->base; + adsp->region_assign_size[offset] = rmem->size; + adsp->region_assign_owners[offset] = BIT(QCOM_SCM_VMID_HLOS); - ret = qcom_scm_assign_mem(adsp->region_assign_phys, - adsp->region_assign_size, - &adsp->region_assign_perms, - &perm, 1); - if (ret < 0) { - dev_err(adsp->dev, "assign memory failed\n"); - return ret; + ret = qcom_scm_assign_mem(adsp->region_assign_phys[offset], + adsp->region_assign_size[offset], + &adsp->region_assign_owners[offset], + perm, perm_si
[PATCH v7 3/4] remoteproc: qcom: pas: Add SM8650 remoteproc support
Add DSP Peripheral Authentication Service support for the SM8650 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong --- drivers/remoteproc/qcom_q6v5_pas.c | 50 ++ 1 file changed, 50 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 09e8ad9f08c4..d0b1f0f38347 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1213,6 +1213,53 @@ static const struct adsp_data sc7280_wpss_resource = { .ssctl_id = 0x19, }; +static const struct adsp_data sm8650_cdsp_resource = { + .crash_reason_smem = 601, + .firmware_name = "cdsp.mdt", + .dtb_firmware_name = "cdsp_dtb.mdt", + .pas_id = 18, + .dtb_pas_id = 0x25, + .minidump_id = 7, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + "nsp", + NULL + }, + .load_state = "cdsp", + .ssr_name = "cdsp", + .sysmon_name = "cdsp", + .ssctl_id = 0x17, + .region_assign_idx = 2, + .region_assign_count = 1, + .region_assign_shared = true, + .region_assign_vmid = QCOM_SCM_VMID_CDSP, +}; + +static const struct adsp_data sm8650_mpss_resource = { + .crash_reason_smem = 421, + .firmware_name = "modem.mdt", + .dtb_firmware_name = "modem_dtb.mdt", + .pas_id = 4, + .dtb_pas_id = 0x26, + .minidump_id = 3, + .auto_boot = false, + .decrypt_shutdown = true, + .proxy_pd_names = (char*[]){ + "cx", + "mss", + NULL + }, + .load_state = "modem", + .ssr_name = "mpss", + .sysmon_name = "modem", + .ssctl_id = 0x12, + .region_assign_idx = 2, + .region_assign_count = 3, + .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA, +}; + static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,msm8226-adsp-pil", .data = &adsp_resource_init}, { .compatible = "qcom,msm8953-adsp-pil", .data = &msm8996_adsp_resource}, @@ -1268,6 +1315,9 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sm8550-adsp-pas", .data = &sm8550_adsp_resource}, { .compatible = "qcom,sm8550-cdsp-pas", .data = &sm8550_cdsp_resource}, { .compatible = "qcom,sm8550-mpss-pas", .data = &sm8550_mpss_resource}, + { .compatible = "qcom,sm8650-adsp-pas", .data = &sm8550_adsp_resource}, + { .compatible = "qcom,sm8650-cdsp-pas", .data = &sm8650_cdsp_resource}, + { .compatible = "qcom,sm8650-mpss-pas", .data = &sm8650_mpss_resource}, { }, }; MODULE_DEVICE_TABLE(of, adsp_of_match); -- 2.34.1
[PATCH v7 4/4] arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for mpss
The qlink_logging memory region is also used by the modem firmware, add it to the reserved memories and add it to the MPSS memory regions. Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 2df77123a8c7..7a1cbc823306 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -525,6 +525,11 @@ qdss_mem: qdss@8280 { no-map; }; + qlink_logging_mem: qlink-logging@8480 { + reg = <0 0x8480 0 0x20>; + no-map; + }; + mpss_dsm_mem: mpss-dsm@86b0 { reg = <0 0x86b0 0 0x490>; no-map; @@ -2627,7 +2632,8 @@ remoteproc_mpss: remoteproc@408 { "mss"; memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>, - <&mpss_dsm_mem>, <&mpss_dsm_mem_2>; + <&mpss_dsm_mem>, <&mpss_dsm_mem_2>, + <&qlink_logging_mem>; qcom,qmp = <&aoss_qmp>; -- 2.34.1
[PATCH v4 0/7] support '%pd' and '%pD' for print file name
During fault locating, the file name needs to be printed based on the dentry/file address. The offset needs to be calculated each time, which is troublesome. Similar to printk, kprobe supports printing file names for dentry/file addresses. Diff v4 vs v3: 1. Use "argv[i][idx + 3] == 'd'" instead of "argv[i][strlen(argv[i]) - 1] == 'd'" to judge print format in PATCH[4/7]; Diff v3 vs v2: 1. Return the index of where the suffix was found in str_has_suffix(); Diff v2 vs v1: 1. Use "%pd/%pD" print format instead of "pd/pD" print format; 2. Add "%pd/%pD" in README; 3. Expand "%pd/%pD" argument before parameter parsing; 4. Add more detail information in ftrace documentation; 5. Add test cases for new print format in selftests/ftrace; Ye Bin (7): string.h: add str_has_suffix() helper for test string ends with specify string tracing/probes: add traceprobe_expand_dentry_args() helper tracing/probes: support '%pd' type for print struct dentry's name tracing/probes: support '%pD' type for print struct file's name tracing: add new type "%pd/%pD" in readme_msg[] Documentation: tracing: add new type '%pd' and '%pD' for kprobe selftests/ftrace: add test cases for VFS type "%pd" and "%pD" Documentation/trace/kprobetrace.rst | 6 +- include/linux/string.h| 28 +++ kernel/trace/trace.c | 2 +- kernel/trace/trace_kprobe.c | 6 ++ kernel/trace/trace_probe.c| 47 +++ kernel/trace/trace_probe.h| 3 + .../ftrace/test.d/kprobe/kprobe_args_vfs.tc | 79 +++ 7 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc -- 2.31.1
[PATCH v4 5/7] tracing: add new type "%pd/%pD" in readme_msg[]
Signed-off-by: Ye Bin --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a7c6fd934e9..13197d3b86bd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5745,7 +5745,7 @@ static const char readme_msg[] = "\t +|-[u](), \\imm-value, \\\"imm-string\"\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b@/, ustring,\n" - "\t symstr, \\[\\]\n" + "\t symstr, %pd/%pD \\[\\]\n" #ifdef CONFIG_HIST_TRIGGERS "\tfield: ;\n" "\tstype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" -- 2.31.1
[PATCH v4 4/7] tracing/probes: support '%pD' type for print struct file's name
Similar to '%pD' for printk, use '%pD' for print struct file's name. Signed-off-by: Ye Bin --- kernel/trace/trace_probe.c | 41 -- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index cc8bd7ea5341..a664633137a0 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt)"trace_probe: " fmt #include +#include #include "trace_btf.h" #include "trace_probe.h" @@ -1574,28 +1575,38 @@ int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf, for (i = 0; i < argc; i++) { size_t idx; - if (str_has_suffix(argv[i], ":%pd", &idx)) { - char *tmp = kstrdup(argv[i], GFP_KERNEL); - char *equal; + if (!str_has_suffix(argv[i], ":%pd", &idx) && + !str_has_suffix(argv[i], ":%pD", &idx)) + continue; - if (!tmp) - return -ENOMEM; + char *tmp = kstrdup(argv[i], GFP_KERNEL); + char *equal; + + if (!tmp) + return -ENOMEM; - equal = strchr(tmp, '='); - if (equal) - *equal = '\0'; - tmp[idx] = '\0'; + equal = strchr(tmp, '='); + if (equal) + *equal = '\0'; + tmp[idx] = '\0'; + if (argv[i][idx + 3] == 'd') ret = snprintf(buf + used, bufsize - used, "%s%s+0x0(+0x%zx(%s)):string", equal ? tmp : "", equal ? "=" : "", offsetof(struct dentry, d_name.name), equal ? equal + 1 : tmp); - kfree(tmp); - if (ret >= bufsize - used) - return -ENOMEM; - argv[i] = buf + used; - used += ret + 1; - } + else + ret = snprintf(buf + used, bufsize - used, + "%s%s+0x0(+0x%zx(+0x%zx(%s))):string", + equal ? tmp : "", equal ? "=" : "", + offsetof(struct dentry, d_name.name), + offsetof(struct file, f_path.dentry), + equal ? equal + 1 : tmp); + kfree(tmp); + if (ret >= bufsize - used) + return -ENOMEM; + argv[i] = buf + used; + used += ret + 1; } return 0; -- 2.31.1
[PATCH v4 1/7] string.h: add str_has_suffix() helper for test string ends with specify string
str_has_suffix() is test string if ends with specify string, and also this API may return the index of where the suffix was found. Signed-off-by: Ye Bin --- include/linux/string.h | 28 1 file changed, 28 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 433c207a01da..2fb0f22237fe 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -405,4 +405,32 @@ static __always_inline size_t str_has_prefix(const char *str, const char *prefix return strncmp(str, prefix, len) == 0 ? len : 0; } +/** + * str_has_suffix - Test if a string has a given suffix + * @str: The string to test + * @suffix: The string to see if @str ends with + * @index: The index into @str of where @suffix is if found (NULL to ignore) + * + * Returns: + * * strlen(@suffix) if @str ends with @suffix + * * 0 if @str does not end with @suffix + */ +static __always_inline size_t str_has_suffix(const char *str, const char *suffix, +size_t *index) +{ + size_t len = strlen(suffix); + size_t str_len = strlen(str); + + if (len > str_len) + return 0; + + if (strncmp(str + str_len - len, suffix, len)) + return 0; + + if (index) + *index = str_len - len; + + return len; +} + #endif /* _LINUX_STRING_H_ */ -- 2.31.1
[PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name
Similar to '%pd' for printk, use '%pd' for print struct dentry's name. Signed-off-by: Ye Bin --- kernel/trace/trace_kprobe.c | 6 ++ kernel/trace/trace_probe.h | 1 + 2 files changed, 7 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c4c6e0e0068b..00b74530fbad 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) char buf[MAX_EVENT_NAME_LEN]; char gbuf[MAX_EVENT_NAME_LEN]; char abuf[MAX_BTF_ARGS_LEN]; + char dbuf[MAX_DENTRY_ARGS_LEN]; struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; switch (argv[0][0]) { @@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) argv = new_argv; } + ret = traceprobe_expand_dentry_args(argc, argv, dbuf, + MAX_DENTRY_ARGS_LEN); + if (ret) + goto out; + /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, argc, is_return); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 553371a4e0b1..d9c053824975 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -34,6 +34,7 @@ #define MAX_ARRAY_LEN 64 #define MAX_ARG_NAME_LEN 32 #define MAX_BTF_ARGS_LEN 128 +#define MAX_DENTRY_ARGS_LEN256 #define MAX_STRING_SIZEPATH_MAX #define MAX_ARG_BUF_LEN(MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) -- 2.31.1
[PATCH v4 2/7] tracing/probes: add traceprobe_expand_dentry_args() helper
Add traceprobe_expand_dentry_args() to expand dentry args. this API is prepare to support "%pd" print format for kprobe. Signed-off-by: Ye Bin --- kernel/trace/trace_probe.c | 36 kernel/trace/trace_probe.h | 2 ++ 2 files changed, 38 insertions(+) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4dc74d73fc1d..cc8bd7ea5341 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1565,6 +1565,42 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], return ERR_PTR(ret); } +int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf, + int bufsize) +{ + int i, used, ret; + + used = 0; + for (i = 0; i < argc; i++) { + size_t idx; + + if (str_has_suffix(argv[i], ":%pd", &idx)) { + char *tmp = kstrdup(argv[i], GFP_KERNEL); + char *equal; + + if (!tmp) + return -ENOMEM; + + equal = strchr(tmp, '='); + if (equal) + *equal = '\0'; + tmp[idx] = '\0'; + ret = snprintf(buf + used, bufsize - used, + "%s%s+0x0(+0x%zx(%s)):string", + equal ? tmp : "", equal ? "=" : "", + offsetof(struct dentry, d_name.name), + equal ? equal + 1 : tmp); + kfree(tmp); + if (ret >= bufsize - used) + return -ENOMEM; + argv[i] = buf + used; + used += ret + 1; + } + } + + return 0; +} + void traceprobe_finish_parse(struct traceprobe_parse_context *ctx) { clear_btf_context(ctx); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 850d9ecb6765..553371a4e0b1 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -402,6 +402,8 @@ extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char **traceprobe_expand_meta_args(int argc, const char *argv[], int *new_argc, char *buf, int bufsize, struct traceprobe_parse_context *ctx); +extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf, +int bufsize); extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); -- 2.31.1
[PATCH v4 6/7] Documentation: tracing: add new type '%pd' and '%pD' for kprobe
Similar to printk() '%pd' is for fetch dentry's name from struct dentry's pointer, and '%pD' is for fetch file's name from struct file's pointer. Signed-off-by: Ye Bin --- Documentation/trace/kprobetrace.rst | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index bf9cecb69fc9..a1d12d65a8dc 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -58,7 +58,8 @@ Synopsis of kprobe_events NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types - (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" + (x8/x16/x32/x64), VFS layer common type(%pd/%pD) for print + file name, "char", "string", "ustring", "symbol", "symstr" and bitfield are supported. (\*1) only for the probe on function entry (offs == 0). Note, this argument access @@ -113,6 +114,9 @@ With 'symstr' type, you can filter the event with wildcard pattern of the symbols, and you don't need to solve symbol name by yourself. For $comm, the default type is "string"; any other type is invalid. +VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or +file's name from struct dentry's pointer or struct file's pointer. + .. _user_mem_access: User Memory Access -- 2.31.1
[PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"
This patch adds test cases for new print format type "%pd/%pD".The test cases test the following items: 1. Test README if add "%pd/%pD" type; 2. Test "%pd" type for dput(); 3. Test "%pD" type for vfs_read(); Signed-off-by: Ye Bin --- .../ftrace/test.d/kprobe/kprobe_args_vfs.tc | 79 +++ 1 file changed, 79 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc new file mode 100644 index ..1d8edd294dd6 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc @@ -0,0 +1,79 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event VFS type argument +# requires: kprobe_events + +case `uname -m` in +x86_64) + ARG1=%di +;; +i[3456]86) + ARG1=%ax +;; +aarch64) + ARG1=%x0 +;; +arm*) + ARG1=%r0 +;; +ppc64*) + ARG1=%r3 +;; +ppc*) + ARG1=%r3 +;; +s390*) + ARG1=%r2 +;; +mips*) + ARG1=%r4 +;; +loongarch*) + ARG1=%r4 +;; +riscv*) + ARG1=%a0 +;; +*) + echo "Please implement other architecture here" + exit_untested +esac + +: "Test argument %pd/%pD in README" +grep -q "%pd/%pD" README + +: "Test argument %pd with name" +echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pd without name" +echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD with name" +echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD without name" +echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace -- 2.31.1
Re: [PATCH v7 3/4] remoteproc: qcom: pas: Add SM8650 remoteproc support
On 1/23/2024 2:21 PM, Neil Armstrong wrote: Add DSP Peripheral Authentication Service support for the SM8650 platform. Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong --- drivers/remoteproc/qcom_q6v5_pas.c | 50 ++ 1 file changed, 50 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 09e8ad9f08c4..d0b1f0f38347 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1213,6 +1213,53 @@ static const struct adsp_data sc7280_wpss_resource = { .ssctl_id = 0x19, }; +static const struct adsp_data sm8650_cdsp_resource = { + .crash_reason_smem = 601, + .firmware_name = "cdsp.mdt", + .dtb_firmware_name = "cdsp_dtb.mdt", + .pas_id = 18, + .dtb_pas_id = 0x25, + .minidump_id = 7, + .auto_boot = true, + .proxy_pd_names = (char*[]){ + "cx", + "mxc", + "nsp", + NULL + }, + .load_state = "cdsp", + .ssr_name = "cdsp", + .sysmon_name = "cdsp", + .ssctl_id = 0x17, + .region_assign_idx = 2, + .region_assign_count = 1, + .region_assign_shared = true, + .region_assign_vmid = QCOM_SCM_VMID_CDSP, +}; + +static const struct adsp_data sm8650_mpss_resource = { + .crash_reason_smem = 421, + .firmware_name = "modem.mdt", + .dtb_firmware_name = "modem_dtb.mdt", + .pas_id = 4, + .dtb_pas_id = 0x26, + .minidump_id = 3, + .auto_boot = false, + .decrypt_shutdown = true, + .proxy_pd_names = (char*[]){ + "cx", + "mss", + NULL + }, + .load_state = "modem", + .ssr_name = "mpss", + .sysmon_name = "modem", + .ssctl_id = 0x12, + .region_assign_idx = 2, + .region_assign_count = 3, I see this has changed from 2 to 3 after qlink logging addition; + .region_assign_vmid = QCOM_SCM_VMID_MSS_MSA, +}; + static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,msm8226-adsp-pil", .data = &adsp_resource_init}, { .compatible = "qcom,msm8953-adsp-pil", .data = &msm8996_adsp_resource}, @@ -1268,6 +1315,9 @@ static const struct of_device_id adsp_of_match[] = { { .compatible = "qcom,sm8550-adsp-pas", .data = &sm8550_adsp_resource}, { .compatible = "qcom,sm8550-cdsp-pas", .data = &sm8550_cdsp_resource}, { .compatible = "qcom,sm8550-mpss-pas", .data = &sm8550_mpss_resource}, + { .compatible = "qcom,sm8650-adsp-pas", .data = &sm8550_adsp_resource}, Same as sm8550; + { .compatible = "qcom,sm8650-cdsp-pas", .data = &sm8650_cdsp_resource}, + { .compatible = "qcom,sm8650-mpss-pas", .data = &sm8650_mpss_resource}, LGTM, Acked-by: Mukesh Ojha -Mukesh { }, }; MODULE_DEVICE_TABLE(of, adsp_of_match);
Re: [PATCH v7 4/4] arm64: dts: qcom: sm8650: add missing qlink_logging reserved memory for mpss
On 1/23/2024 2:21 PM, Neil Armstrong wrote: The qlink_logging memory region is also used by the modem firmware, add it to the reserved memories and add it to the MPSS memory regions. Signed-off-by: Neil Armstrong LGTM, Reviewed-by: Mukesh Ojha -Mukesh --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 2df77123a8c7..7a1cbc823306 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -525,6 +525,11 @@ qdss_mem: qdss@8280 { no-map; }; + qlink_logging_mem: qlink-logging@8480 { + reg = <0 0x8480 0 0x20>; + no-map; + }; + mpss_dsm_mem: mpss-dsm@86b0 { reg = <0 0x86b0 0 0x490>; no-map; @@ -2627,7 +2632,8 @@ remoteproc_mpss: remoteproc@408 { "mss"; memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>, - <&mpss_dsm_mem>, <&mpss_dsm_mem_2>; + <&mpss_dsm_mem>, <&mpss_dsm_mem_2>, + <&qlink_logging_mem>; qcom,qmp = <&aoss_qmp>;
[PATCH net-next v3 2/5] page_frag: unify gfp bits for order 3 page allocation
Currently there seems to be three page frag implementions which all try to allocate order 3 page, if that fails, it then fail back to allocate order 0 page, and each of them all allow order 3 page allocation to fail under certain condition by using specific gfp bits. The gfp bits for order 3 page allocation are different between different implementation, __GFP_NOMEMALLOC is or'd to forbid access to emergency reserves memory for __page_frag_cache_refill(), but it is not or'd in other implementions, __GFP_DIRECT_RECLAIM is masked off to avoid direct reclaim in skb_page_frag_refill(), but it is not masked off in __page_frag_cache_refill(). This patch unifies the gfp bits used between different implementions by or'ing __GFP_NOMEMALLOC and masking off __GFP_DIRECT_RECLAIM for order 3 page allocation to avoid possible pressure for mm. Signed-off-by: Yunsheng Lin Reviewed-by: Alexander Duyck CC: Alexander Duyck --- drivers/vhost/net.c | 2 +- mm/page_alloc.c | 4 ++-- net/core/sock.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f2ed7167c848..e574e21cc0ca 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -670,7 +670,7 @@ static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, /* Avoid direct reclaim but allow kswapd to wake */ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY, + __GFP_NORETRY | __GFP_NOMEMALLOC, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c0f7e67c4250..636145c29f70 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4685,8 +4685,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; + gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | + __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER); nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; diff --git a/net/core/sock.c b/net/core/sock.c index 158dbdebce6a..d4bc4269d7d7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2908,7 +2908,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) /* Avoid direct reclaim but allow kswapd to wake */ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY, + __GFP_NORETRY | __GFP_NOMEMALLOC, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; -- 2.33.0
[PATCH net-next v3 4/5] vhost/net: remove vhost_net_page_frag_refill()
The page frag in vhost_net_page_frag_refill() uses the 'struct page_frag' from skb_page_frag_refill(), but it's implementation is similar to page_frag_alloc_align() now. This patch removes vhost_net_page_frag_refill() by using 'struct page_frag_cache' instead of 'struct page_frag', and allocating frag using page_frag_alloc_align(). The added benefit is that not only unifying the page frag implementation a little, but also having about 0.5% performance boost testing by using the vhost_net_test introduced in the last patch. Signed-off-by: Yunsheng Lin Acked-by: Jason Wang --- drivers/vhost/net.c | 91 ++--- 1 file changed, 27 insertions(+), 64 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e574e21cc0ca..4b2fcb228a0a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,10 +141,8 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; - /* Private page frag */ - struct page_frag page_frag; - /* Refcount bias of page frag */ - int refcnt_bias; + /* Private page frag cache */ + struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } -static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, - struct page_frag *pfrag, gfp_t gfp) -{ - if (pfrag->page) { - if (pfrag->offset + sz <= pfrag->size) - return true; - __page_frag_cache_drain(pfrag->page, net->refcnt_bias); - } - - pfrag->offset = 0; - net->refcnt_bias = 0; - if (SKB_FRAG_PAGE_ORDER) { - /* Avoid direct reclaim but allow kswapd to wake */ - pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | - __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY | __GFP_NOMEMALLOC, - SKB_FRAG_PAGE_ORDER); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; - goto done; - } - } - pfrag->page = alloc_page(gfp); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE; - goto done; - } - return false; - -done: - net->refcnt_bias = USHRT_MAX; - page_ref_add(pfrag->page, USHRT_MAX - 1); - return true; -} - #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, @@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); - struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, int sock_hlen = nvq->sock_hlen; void *buf; int copied; + int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; @@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); - alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!vhost_net_page_frag_refill(net, buflen, -alloc_frag, GFP_KERNEL))) + buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, + SMP_CACHE_BYTES); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - copied = copy_page_from_iter(alloc_frag->page, -alloc_frag->offset + -offsetof(struct tun_xdp_hdr, gso), -sock_hlen, from); - if (copied != sock_hlen) - return -EFAULT; + copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), + sock_hlen, from); + if (copied != sock_hlen) { + ret = -EFAULT; + goto err; + } hdr = buf; gso = &hdr->gso; @@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); -
[PATCH net-next v3 5/5] tools: virtio: introduce vhost_net_test
introduce vhost_net_test basing on virtio_test to test vhost_net changing in the kernel. Signed-off-by: Yunsheng Lin --- tools/virtio/.gitignore | 1 + tools/virtio/Makefile | 8 +- tools/virtio/vhost_net_test.c | 576 ++ 3 files changed, 582 insertions(+), 3 deletions(-) create mode 100644 tools/virtio/vhost_net_test.c diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 9934d48d9a55..7e47b281c442 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d virtio_test +vhost_net_test vringh_test virtio-trace/trace-agent diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index d128925980e0..e25e99c1c3b7 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 all: test mod -test: virtio_test vringh_test +test: virtio_test vringh_test vhost_net_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o +vhost_net_test: virtio_ring.o vhost_net_test.o try-run = $(shell set -e; \ if ($(1)) >/dev/null 2>&1; \ @@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean .PHONY: all test mod clean vhost oot oot-clean oot-build clean: - ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ - vhost_test/Module.symvers vhost_test/modules.order *.d + ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \ + vhost_test/.*.cmd vhost_test/Module.symvers \ + vhost_test/modules.order *.d -include *.d diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c new file mode 100644 index ..e336792a0d77 --- /dev/null +++ b/tools/virtio/vhost_net_test.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RANDOM_BATCH -1 +#define HDR_LEN12 +#define TEST_BUF_LEN 256 +#define TEST_PTYPE ETH_P_LOOPBACK + +/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int idx; + long started; + long completed; + struct pollfd fds; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct vq_info vqs[2]; + int nvqs; + void *buf; + size_t buf_size; + char *test_buf; + char *res_buf; + struct vhost_memory *mem; + int sock; + int ifindex; + unsigned char mac[ETHER_ADDR_LEN]; +}; + +static int tun_alloc(struct vdev_info *dev) +{ + struct ifreq ifr; + int len = HDR_LEN; + int fd, e; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("Cannot open /dev/net/tun"); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + snprintf(ifr.ifr_name, IFNAMSIZ, "tun_%d", getpid()); + + e = ioctl(fd, TUNSETIFF, &ifr); + if (e < 0) { + perror("ioctl[TUNSETIFF]"); + close(fd); + return e; + } + + e = ioctl(fd, TUNSETVNETHDRSZ, &len); + if (e < 0) { + perror("ioctl[TUNSETVNETHDRSZ]"); + close(fd); + return e; + } + + e = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (e < 0) { + perror("ioctl[SIOCGIFHWADDR]"); + close(fd); + return e; + } + + memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + return fd; +} + +static void vdev_create_socket(struct vdev_info *dev) +{ + struct ifreq ifr; + + dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); + assert(dev->sock != -1); + + snprintf(ifr.ifr_name, IFNAMSIZ, "tun_%d", getpid()); + assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); + + dev->ifindex = ifr.ifr_ifindex; + + /* Set the flags that bring the device up */ + assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); +} + +static void vdev_send_packet(struct vdev_info *dev) +{ + char *sendbuf = dev->test_buf + HDR_LEN; + struct sockaddr_ll saddrll = {0}; + int sockfd = dev->sock; + int ret; + + saddrll.sll_family = PF_PACKET; + saddrll.sll_ifindex = dev->ifindex; + saddrll.sll_halen = ETH_ALEN; + sadd
[PATCH v12 0/6] Introducing trace buffer mapping by user-space
The tracing ring-buffers can be stored on disk or sent to network without any copy via splice. However the later doesn't allow real time processing of the traces. A solution is to give userspace direct access to the ring-buffer pages via a mapping. An application can now become a consumer of the ring-buffer, in a similar fashion to what trace_pipe offers. Support for this new feature can already be found in libtracefs from version 1.8, when built with EXTRA_CFLAGS=-DFORCE_MMAP_ENABLE. Vincent v11 -> v12: * Fix code sample mmap bug. * Add logging in sample code. * Reset tracer in selftest. * Add a refcount for the snapshot users. * Prevent mapping when there are snapshot users and vice versa. * Refine the meta-page. * Fix types in the meta-page. * Collect Reviewed-by. v10 -> v11: * Add Documentation and code sample. * Add a selftest. * Move all the update to the meta-page into a single rb_update_meta_page(). * rb_update_meta_page() is now called from ring_buffer_map_get_reader() to fix NOBLOCK callers. * kerneldoc for struct trace_meta_page. * Add a patch to zero all the ring-buffer allocations. v9 -> v10: * Refactor rb_update_meta_page() * In-loop declaration for foreach_subbuf_page() * Check for cpu_buffer->mapped overflow v8 -> v9: * Fix the unlock path in ring_buffer_map() * Fix cpu_buffer cast with rb_work_rq->is_cpu_buffer * Rebase on linux-trace/for-next (3cb3091138ca0921c4569bcf7ffa062519639b6a) v7 -> v8: * Drop the subbufs renaming into bpages * Use subbuf as a name when relevant v6 -> v7: * Rebase onto lore.kernel.org/lkml/20231215175502.106587...@goodmis.org/ * Support for subbufs * Rename subbufs into bpages v5 -> v6: * Rebase on next-20230802. * (unsigned long) -> (void *) cast for virt_to_page(). * Add a wait for the GET_READER_PAGE ioctl. * Move writer fields update (overrun/pages_lost/entries/pages_touched) in the irq_work. * Rearrange id in struct buffer_page. * Rearrange the meta-page. * ring_buffer_meta_page -> trace_buffer_meta_page. * Add meta_struct_len into the meta-page. v4 -> v5: * Trivial rebase onto 6.5-rc3 (previously 6.4-rc3) v3 -> v4: * Add to the meta-page: - pages_lost / pages_read (allow to compute how full is the ring-buffer) - read (allow to compute how many entries can be read) - A reader_page struct. * Rename ring_buffer_meta_header -> ring_buffer_meta * Rename ring_buffer_get_reader_page -> ring_buffer_map_get_reader_page * Properly consume events on ring_buffer_map_get_reader_page() with rb_advance_reader(). v2 -> v3: * Remove data page list (for non-consuming read) ** Implies removing order > 0 meta-page * Add a new meta page field ->read * Rename ring_buffer_meta_page_header into ring_buffer_meta_header v1 -> v2: * Hide data_pages from the userspace struct * Fix META_PAGE_MAX_PAGES * Support for order > 0 meta-page * Add missing page->mapping. Vincent Donnefort (6): ring-buffer: Zero ring-buffer sub-buffers ring-buffer: Introducing ring-buffer mapping functions tracing: Add snapshot refcount tracing: Allow user-space mapping of the ring-buffer Documentation: tracing: Add ring-buffer mapping ring-buffer/selftest: Add ring-buffer mapping test Documentation/trace/index.rst | 1 + Documentation/trace/ring-buffer-map.rst | 106 ++ include/linux/ring_buffer.h | 7 + include/uapi/linux/trace_mmap.h | 46 +++ kernel/trace/ring_buffer.c| 338 +- kernel/trace/trace.c | 208 ++- kernel/trace/trace.h | 6 + kernel/trace/trace_events_trigger.c | 57 ++- tools/testing/selftests/ring-buffer/Makefile | 8 + tools/testing/selftests/ring-buffer/config| 1 + .../testing/selftests/ring-buffer/map_test.c | 188 ++ 11 files changed, 925 insertions(+), 41 deletions(-) create mode 100644 Documentation/trace/ring-buffer-map.rst create mode 100644 include/uapi/linux/trace_mmap.h create mode 100644 tools/testing/selftests/ring-buffer/Makefile create mode 100644 tools/testing/selftests/ring-buffer/config create mode 100644 tools/testing/selftests/ring-buffer/map_test.c base-commit: 4f1991a92cfe89096b2d1f5583a2e093bdd55c37 -- 2.43.0.429.g432eaa2c6b-goog
[PATCH v12 1/6] ring-buffer: Zero ring-buffer sub-buffers
In preparation for the ring-buffer memory mapping where each subbuf will be accessible to user-space, zero all the page allocations. Signed-off-by: Vincent Donnefort Reviewed-by: Masami Hiramatsu (Google) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 173d2595ce2d..db73e326fa04 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1466,7 +1466,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, list_add(&bpage->list, pages); - page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, + page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), + mflags | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) goto free_pages; @@ -1551,7 +1552,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) cpu_buffer->reader_page = bpage; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, cpu_buffer->buffer->subbuf_order); + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_ZERO, + cpu_buffer->buffer->subbuf_order); if (!page) goto fail_free_reader; bpage->page = page_address(page); @@ -5525,7 +5527,8 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu) if (bpage->data) goto out; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, + page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) { kfree(bpage); -- 2.43.0.429.g432eaa2c6b-goog
[PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions
In preparation for allowing the user-space to map a ring-buffer, add a set of mapping functions: ring_buffer_{map,unmap}() ring_buffer_map_fault() And controls on the ring-buffer: ring_buffer_map_get_reader() /* swap reader and head */ Mapping the ring-buffer also involves: A unique ID for each subbuf of the ring-buffer, currently they are only identified through their in-kernel VA. A meta-page, where are stored ring-buffer statistics and a description for the current reader The linear mapping exposes the meta-page, and each subbuf of the ring-buffer, ordered following their unique ID, assigned during the first mapping. Once mapped, no subbuf can get in or out of the ring-buffer: the buffer size will remain unmodified and the splice enabling functions will in reality simply memcpy the data instead of swapping subbufs. Signed-off-by: Vincent Donnefort diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa802db216f9..0841ba8bab14 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -6,6 +6,8 @@ #include #include +#include + struct trace_buffer; struct ring_buffer_iter; @@ -221,4 +223,9 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); #define trace_rb_cpu_prepare NULL #endif +int ring_buffer_map(struct trace_buffer *buffer, int cpu); +int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); +struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu, + unsigned long pgoff); +int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); #endif /* _LINUX_RING_BUFFER_H */ diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h new file mode 100644 index ..5468afc94be7 --- /dev/null +++ b/include/uapi/linux/trace_mmap.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _TRACE_MMAP_H_ +#define _TRACE_MMAP_H_ + +#include + +/** + * struct trace_buffer_meta - Ring-buffer Meta-page description + * @meta_page_size:Size of this meta-page. + * @meta_struct_len: Size of this structure. + * @subbuf_size: Size of each subbuf, including the header. + * @nr_subbufs:Number of subbfs in the ring-buffer. + * @reader.lost_events:Number of events lost at the time of the reader swap. + * @reader.id: subbuf ID of the current reader. From 0 to @nr_subbufs - 1 + * @reader.read: Number of bytes read on the reader subbuf. + * @entries: Number of entries in the ring-buffer. + * @overrun: Number of entries lost in the ring-buffer. + * @read: Number of entries that have been read. + * @subbufs_touched: Number of subbufs that have been filled. + * @subbufs_lost: Number of subbufs lost to overrun. + * @subbufs_read: Number of subbufs that have been read. + */ +struct trace_buffer_meta { + __u32 meta_page_size; + __u32 meta_struct_len; + + __u32 subbuf_size; + __u32 nr_subbufs; + + struct { + __u64 lost_events; + __u32 id; + __u32 read; + } reader; + + __u64 entries; + __u64 overrun; + __u64 read; + + __u64 subbufs_touched; + __u64 subbufs_lost; +}; + +#endif /* _TRACE_MMAP_H_ */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index db73e326fa04..e7b39f7a1c74 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -338,6 +338,7 @@ struct buffer_page { local_t entries; /* entries on this page */ unsigned longreal_end; /* real end of data */ unsigned order; /* order of the page */ + u32 id;/* ID for external mapping */ struct buffer_data_page *page; /* Actual data page */ }; @@ -484,6 +485,12 @@ struct ring_buffer_per_cpu { u64 read_stamp; /* pages removed since last reset */ unsigned long pages_removed; + + int mapped; + struct mutexmapping_lock; + unsigned long *subbuf_ids;/* ID to addr */ + struct trace_buffer_meta*meta_page; + /* ring buffer pages to update, > 0 to add, < 0 to remove */ longnr_pages_to_update; struct list_headnew_pages; /* new pages to add */ @@ -1542,6 +1549,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&cpu_buffer->irq_work.waiters); init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); + mutex_init(&cpu_buffer->mapping_lock); bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_lin
[PATCH v12 3/6] tracing: Add snapshot refcount
When a ring-buffer is memory mapped by user-space, no trace or ring-buffer swap is possible. This means the snapshot feature is mutually exclusive with the memory mapping. Having a refcount on snapshot users will help to know if a mapping is possible or not. Signed-off-by: Vincent Donnefort diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46dbe22121e9..ac59321a8d95 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1300,6 +1300,52 @@ static void free_snapshot(struct trace_array *tr) tr->allocated_snapshot = false; } +static int tracing_arm_snapshot_locked(struct trace_array *tr) +{ + int ret; + + lockdep_assert_held(&trace_types_lock); + + if (tr->snapshot == UINT_MAX) + return -EBUSY; + + ret = tracing_alloc_snapshot_instance(tr); + if (ret) + return ret; + + tr->snapshot++; + + return 0; +} + +static void tracing_disarm_snapshot_locked(struct trace_array *tr) +{ + lockdep_assert_held(&trace_types_lock); + + if (WARN_ON(!tr->snapshot)) + return; + + tr->snapshot--; +} + +int tracing_arm_snapshot(struct trace_array *tr) +{ + int ret; + + mutex_lock(&trace_types_lock); + ret = tracing_arm_snapshot_locked(tr); + mutex_unlock(&trace_types_lock); + + return ret; +} + +void tracing_disarm_snapshot(struct trace_array *tr) +{ + mutex_lock(&trace_types_lock); + tracing_disarm_snapshot_locked(tr); + mutex_unlock(&trace_types_lock); +} + /** * tracing_alloc_snapshot - allocate snapshot buffer. * @@ -1373,10 +1419,6 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, mutex_lock(&trace_types_lock); - ret = tracing_alloc_snapshot_instance(tr); - if (ret) - goto fail_unlock; - if (tr->current_trace->use_max_tr) { ret = -EBUSY; goto fail_unlock; @@ -1395,6 +1437,10 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, goto fail_unlock; } + ret = tracing_arm_snapshot_locked(tr); + if (ret) + goto fail_unlock; + local_irq_disable(); arch_spin_lock(&tr->max_lock); tr->cond_snapshot = cond_snapshot; @@ -1439,6 +1485,8 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) arch_spin_unlock(&tr->max_lock); local_irq_enable(); + tracing_disarm_snapshot(tr); + return ret; } EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); @@ -6579,11 +6627,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) */ synchronize_rcu(); free_snapshot(tr); + tracing_disarm_snapshot_locked(tr); } - if (t->use_max_tr && !tr->allocated_snapshot) { - ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) + if (t->use_max_tr) { + ret = tracing_arm_snapshot_locked(tr); + if (ret) goto out; } #else @@ -6592,8 +6641,11 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (t->init) { ret = tracer_init(t, tr); - if (ret) + if (ret) { + if (t->use_max_tr) + tracing_disarm_snapshot_locked(tr); goto out; + } } tr->current_trace = t; @@ -7695,10 +7747,11 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, if (tr->allocated_snapshot) ret = resize_buffer_duplicate_size(&tr->max_buffer, &tr->array_buffer, iter->cpu_file); - else - ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) + + ret = tracing_arm_snapshot_locked(tr); + if (ret) break; + /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { local_irq_disable(); @@ -7708,6 +7761,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, (void *)tr, 1); } + tracing_disarm_snapshot_locked(tr); break; default: if (tr->allocated_snapshot) { @@ -8832,8 +8886,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; - if (glob[0] == '!') - return unregister_ftrace_function_probe_func(glob+1, tr, ops); + if (glob[0] == '!') { + ret = unregister_ftrace_fu
[PATCH v12 4/6] tracing: Allow user-space mapping of the ring-buffer
Currently, user-space extracts data from the ring-buffer via splice, which is handy for storage or network sharing. However, due to splice limitations, it is imposible to do real-time analysis without a copy. A solution for that problem is to let the user-space map the ring-buffer directly. The mapping is exposed via the per-CPU file trace_pipe_raw. The first element of the mapping is the meta-page. It is followed by each subbuffer constituting the ring-buffer, ordered by their unique page ID: * Meta-page -- include/uapi/linux/trace_mmap.h for a description * Subbuf ID 0 * Subbuf ID 1 ... It is therefore easy to translate a subbuf ID into an offset in the mapping: reader_id = meta->reader->id; reader_offset = meta->meta_page_size + reader_id * meta->subbuf_size; When new data is available, the mapper must call a newly introduced ioctl: TRACE_MMAP_IOCTL_GET_READER. This will update the Meta-page reader ID to point to the next reader containing unread data. Signed-off-by: Vincent Donnefort diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h index 5468afc94be7..09c4ae6d6e71 100644 --- a/include/uapi/linux/trace_mmap.h +++ b/include/uapi/linux/trace_mmap.h @@ -41,4 +41,6 @@ struct trace_buffer_meta { __u64 subbufs_lost; }; +#define TRACE_MMAP_IOCTL_GET_READER_IO('T', 0x1) + #endif /* _TRACE_MMAP_H_ */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ac59321a8d95..eb9e829ecc12 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1175,6 +1175,12 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, return; } + if (tr->mapped) { + trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); + return; + } + local_irq_save(flags); update_max_tr(tr, current, smp_processor_id(), cond_data); local_irq_restore(flags); @@ -1309,6 +1315,9 @@ static int tracing_arm_snapshot_locked(struct trace_array *tr) if (tr->snapshot == UINT_MAX) return -EBUSY; + if (tr->mapped) + return -EBUSY; + ret = tracing_alloc_snapshot_instance(tr); if (ret) return ret; @@ -6520,7 +6529,7 @@ static void tracing_set_nop(struct trace_array *tr) { if (tr->current_trace == &nop_trace) return; - + tr->current_trace->enabled--; if (tr->current_trace->reset) @@ -8637,15 +8646,31 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, return ret; } -/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ftrace_buffer_info *info = file->private_data; struct trace_iterator *iter = &info->iter; + int err; - if (cmd) - return -ENOIOCTLCMD; + if (cmd == TRACE_MMAP_IOCTL_GET_READER) { + if (!(file->f_flags & O_NONBLOCK)) { + err = ring_buffer_wait(iter->array_buffer->buffer, + iter->cpu_file, + iter->tr->buffer_percent); + if (err) + return err; + } + + return ring_buffer_map_get_reader(iter->array_buffer->buffer, + iter->cpu_file); + } else if (cmd) { + return -ENOTTY; + } + /* +* An ioctl call with cmd 0 to the ring buffer file will wake up all +* waiters +*/ mutex_lock(&trace_types_lock); iter->wait_index++; @@ -8658,6 +8683,90 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned return 0; } +static vm_fault_t tracing_buffers_mmap_fault(struct vm_fault *vmf) +{ + struct ftrace_buffer_info *info = vmf->vma->vm_file->private_data; + struct trace_iterator *iter = &info->iter; + vm_fault_t ret = VM_FAULT_SIGBUS; + struct page *page; + + page = ring_buffer_map_fault(iter->array_buffer->buffer, iter->cpu_file, +vmf->pgoff); + if (!page) + return ret; + + get_page(page); + vmf->page = page; + vmf->page->mapping = vmf->vma->vm_file->f_mapping; + vmf->page->index = vmf->pgoff; + + return 0; +} + +static void tracing_buffers_mmap_close(struct vm_area_struct *vma) +{ + struct ftrace_buffer_info *info = vma->vm_file->private_data; + struct trace_iterator *iter = &info->iter; + + ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file); + +#ifdef CONFIG_TRACER_MAX_TRACE + mutex_lock(&trace_types_lock); + if (!WARN_ON(!iter->tr->mapped)) + ite
[PATCH v12 5/6] Documentation: tracing: Add ring-buffer mapping
It is now possible to mmap() a ring-buffer to stream its content. Add some documentation and a code example. Signed-off-by: Vincent Donnefort diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index 5092d6c13af5..0b300901fd75 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -29,6 +29,7 @@ Linux Tracing Technologies timerlat-tracer intel_th ring-buffer-design + ring-buffer-map stm sys-t coresight/index diff --git a/Documentation/trace/ring-buffer-map.rst b/Documentation/trace/ring-buffer-map.rst new file mode 100644 index ..7d7513180893 --- /dev/null +++ b/Documentation/trace/ring-buffer-map.rst @@ -0,0 +1,106 @@ +.. SPDX-License-Identifier: GPL-2.0 + +== +Tracefs ring-buffer memory mapping +== + +:Author: Vincent Donnefort + +Overview + +Tracefs ring-buffer memory map provides an efficient method to stream data +as no memory copy is necessary. The application mapping the ring-buffer becomes +then a consumer for that ring-buffer, in a similar fashion to trace_pipe. + +Memory mapping setup + +The mapping works with a mmap() of the trace_pipe_raw interface. + +The first system page of the mapping contains ring-buffer statistics and +description. It is referred as the meta-page. One of the most important field of +the meta-page is the reader. It contains the subbuf ID which can be safely read +by the mapper (see ring-buffer-design.rst). + +The meta-page is followed by all the subbuf, ordered by ascendant ID. It is +therefore effortless to know where the reader starts in the mapping: + +.. code-block:: c + +reader_id = meta->reader->id; +reader_offset = meta->meta_page_size + reader_id * meta->subbuf_size; + +When the application is done with the current reader, it can get a new one using +the trace_pipe_raw ioctl() TRACE_MMAP_IOCTL_GET_READER. This ioctl also updates +the meta-page fields. + +Limitations +=== +When a mapping is in place on a Tracefs ring-buffer, it is not possible to +either resize it (either by increasing the entire size of the ring-buffer or +each subbuf). It is also not possible to use snapshot or splice. + +Concurrent readers (either another application mapping that ring-buffer or the +kernel with trace_pipe) are allowed but not recommended. They will compete for +the ring-buffer and the output is unpredictable. + +Example +=== + +.. code-block:: c + +#include +#include +#include +#include + +#include + +#include +#include + +#define TRACE_PIPE_RAW "/sys/kernel/tracing/per_cpu/cpu0/trace_pipe_raw" + +int main(void) +{ +int page_size = getpagesize(), fd, reader_id; +unsigned long meta_len, data_len; +struct trace_buffer_meta *meta; +void *map, *reader, *data; + +fd = open(TRACE_PIPE_RAW, O_RDONLY | O_NONBLOCK); +if (fd < 0) +exit(EXIT_FAILURE); + +map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); +if (map == MAP_FAILED) +exit(EXIT_FAILURE); + +meta = (struct trace_buffer_meta *)map; +meta_len = meta->meta_page_size; + +printf("entries:%llu\n", meta->entries); +printf("overrun:%llu\n", meta->overrun); +printf("read: %llu\n", meta->read); +printf("subbufs_touched:%llu\n", meta->subbufs_touched); +printf("subbufs_lost: %llu\n", meta->subbufs_lost); +printf("nr_subbufs: %u\n", meta->nr_subbufs); + +data_len = meta->subbuf_size * meta->nr_subbufs; +data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, fd, meta_len); +if (data == MAP_FAILED) +exit(EXIT_FAILURE); + +if (ioctl(fd, TRACE_MMAP_IOCTL_GET_READER) < 0) +exit(EXIT_FAILURE); + +reader_id = meta->reader.id; +reader = data + meta->subbuf_size * reader_id; + +printf("Current reader address: %p\n", reader); + +munmap(data, data_len); +munmap(meta, meta_len); +close (fd); + +return 0; +} -- 2.43.0.429.g432eaa2c6b-goog
Re: [PATCH v2 3/5] remoteproc: imx_rproc: Convert to dev_pm_domain_attach|detach_list()
On Tue, 23 Jan 2024 at 02:27, Mathieu Poirier wrote: > > On Mon, Jan 22, 2024 at 01:02:08PM -0700, Mathieu Poirier wrote: > > On Mon, 22 Jan 2024 at 10:51, Iuliana Prodan wrote: > > > > > > On 1/5/2024 6:01 PM, Ulf Hansson wrote: > > > > Let's avoid the boilerplate code to manage the multiple PM domain case, > > > > by > > > > converting into using dev_pm_domain_attach|detach_list(). > > > > > > > > Cc: Mathieu Poirier > > > > Cc: Bjorn Andersson > > > > Cc: Shawn Guo > > > > Cc: Sascha Hauer > > > > Cc: Iuliana Prodan > > > > Cc: Daniel Baluta > > > > Cc: > > > > Signed-off-by: Ulf Hansson > > > > --- > > > > > > > > Changes in v2: > > > > - None. > > > > > > > > Iuliana/Daniel I am ccing you to request help with test/review of this > > > > change. > > > > Note that, you will need patch 1/5 in the series too, to be able to > > > > test this. > > > > > > > > Kind regards > > > > Ulf Hansson > > > > > > Tested-by: Iuliana Prodan > > > Reviewed-by: Iuliana Prodan > > > > > > > Thanks for the leg-work on this. I'll pick this up in rc1 later this week. > > Looking at the other files in this set, Ulf of perhaps Bjorn should take this > set. Yes, all patches in the series depend on the new helper function that is introduced in patch 1. I can certainly take the whole series via my pmdomain tree, if that's fine by everyone. Note that, there is also another series for genpd and qcom drivers that might make it for v6.9. It may be best to keep all these things together to avoid conflicts. Let's see what Greg/Rafael thinks about patch 1 first though. > > for: > > drivers/remoteproc/imx_rproc.c > drivers/remoteproc/imx_dsp_rproc.c > > Reviewed-by: Mathieu Poirier Thanks Mathieu and Iuliana! [...] Kind regards Uffe [1] https://lore.kernel.org/all/20240122-gdsc-hwctrl-v4-0-9061e8a7a...@linaro.org/
Re: [PATCH v4 6/7] Documentation: tracing: add new type '%pd' and '%pD' for kprobe
On Tue, 23 Jan 2024 17:21:38 +0800 Ye Bin wrote: > Similar to printk() '%pd' is for fetch dentry's name from struct dentry's > pointer, and '%pD' is for fetch file's name from struct file's pointer. > > Signed-off-by: Ye Bin > --- > Documentation/trace/kprobetrace.rst | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/Documentation/trace/kprobetrace.rst > b/Documentation/trace/kprobetrace.rst > index bf9cecb69fc9..a1d12d65a8dc 100644 > --- a/Documentation/trace/kprobetrace.rst > +++ b/Documentation/trace/kprobetrace.rst > @@ -58,7 +58,8 @@ Synopsis of kprobe_events >NAME=FETCHARG : Set NAME as the argument name of FETCHARG. >FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types > (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types > - (x8/x16/x32/x64), "char", "string", "ustring", "symbol", > "symstr" > + (x8/x16/x32/x64), VFS layer common type(%pd/%pD) for print > + file name, "char", "string", "ustring", "symbol", "symstr" Could you remove the "for print file name" here? Since this is not the place where such precise information describes. I think following hunk is enough to explain it. Thank you, >and bitfield are supported. > >(\*1) only for the probe on function entry (offs == 0). Note, this > argument access > @@ -113,6 +114,9 @@ With 'symstr' type, you can filter the event with > wildcard pattern of the > symbols, and you don't need to solve symbol name by yourself. > For $comm, the default type is "string"; any other type is invalid. > > +VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or > +file's name from struct dentry's pointer or struct file's pointer. > + > .. _user_mem_access: > > User Memory Access > -- > 2.31.1 > > -- Masami Hiramatsu (Google)
Re: [PATCH v4 2/7] tracing/probes: add traceprobe_expand_dentry_args() helper
Hi Ye, On Tue, 23 Jan 2024 17:21:34 +0800 Ye Bin wrote: > Add traceprobe_expand_dentry_args() to expand dentry args. this API is > prepare to support "%pd" print format for kprobe. > > Signed-off-by: Ye Bin > --- > kernel/trace/trace_probe.c | 36 > kernel/trace/trace_probe.h | 2 ++ > 2 files changed, 38 insertions(+) > > diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c > index 4dc74d73fc1d..cc8bd7ea5341 100644 > --- a/kernel/trace/trace_probe.c > +++ b/kernel/trace/trace_probe.c > @@ -1565,6 +1565,42 @@ const char **traceprobe_expand_meta_args(int argc, > const char *argv[], > return ERR_PTR(ret); > } > > +int traceprobe_expand_dentry_args(int argc, const char *argv[], char *buf, > + int bufsize) > +{ > + int i, used, ret; > + > + used = 0; > + for (i = 0; i < argc; i++) { > + size_t idx; > + > + if (str_has_suffix(argv[i], ":%pd", &idx)) { Instead of using this, I recommend to use `glob_match("*:%pd", argv[i])` so that you can simply expand the pattern as `glob_match("*:%p[dD]",...)` (glob_match means wildcard match like shell does) Thank you, > + char *tmp = kstrdup(argv[i], GFP_KERNEL); > + char *equal; > + > + if (!tmp) > + return -ENOMEM; > + > + equal = strchr(tmp, '='); > + if (equal) > + *equal = '\0'; > + tmp[idx] = '\0'; > + ret = snprintf(buf + used, bufsize - used, nb> + "%s%s+0x0(+0x%zx(%s)):string", > +equal ? tmp : "", equal ? "=" : "", > +offsetof(struct dentry, d_name.name), > +equal ? equal + 1 : tmp); > + kfree(tmp); > + if (ret >= bufsize - used) > + return -ENOMEM; > + argv[i] = buf + used; > + used += ret + 1; > + } > + } > + > + return 0; > +} > + > void traceprobe_finish_parse(struct traceprobe_parse_context *ctx) > { > clear_btf_context(ctx); > diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h > index 850d9ecb6765..553371a4e0b1 100644 > --- a/kernel/trace/trace_probe.h > +++ b/kernel/trace/trace_probe.h > @@ -402,6 +402,8 @@ extern int traceprobe_parse_probe_arg(struct trace_probe > *tp, int i, > const char **traceprobe_expand_meta_args(int argc, const char *argv[], >int *new_argc, char *buf, int bufsize, >struct traceprobe_parse_context *ctx); > +extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char > *buf, > + int bufsize); > > extern int traceprobe_update_arg(struct probe_arg *arg); > extern void traceprobe_free_probe_arg(struct probe_arg *arg); > -- > 2.31.1 > > -- Masami Hiramatsu (Google)
Re: [PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name
On Tue, 23 Jan 2024 17:21:35 +0800 Ye Bin wrote: > Similar to '%pd' for printk, use '%pd' for print struct dentry's name. > > Signed-off-by: Ye Bin > --- > kernel/trace/trace_kprobe.c | 6 ++ > kernel/trace/trace_probe.h | 1 + > 2 files changed, 7 insertions(+) > > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c > index c4c6e0e0068b..00b74530fbad 100644 > --- a/kernel/trace/trace_kprobe.c > +++ b/kernel/trace/trace_kprobe.c > @@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char > *argv[]) > char buf[MAX_EVENT_NAME_LEN]; > char gbuf[MAX_EVENT_NAME_LEN]; > char abuf[MAX_BTF_ARGS_LEN]; > + char dbuf[MAX_DENTRY_ARGS_LEN]; Hmm, no, I don't like to expand stack anymore. Please allocate it from heap. > struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; > > switch (argv[0][0]) { > @@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char > *argv[]) > argv = new_argv; > } > > + ret = traceprobe_expand_dentry_args(argc, argv, dbuf, > + MAX_DENTRY_ARGS_LEN); > + if (ret) > + goto out; And calling this here will not cover the trace_fprobe. Could you call this from traceprobe_expand_meta_args() instead of calling it directly from trace_kprobe? Then it can be used from fprobe_event too. Thank you, > + > /* setup a probe */ > tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, > argc, is_return); > diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h > index 553371a4e0b1..d9c053824975 100644 > --- a/kernel/trace/trace_probe.h > +++ b/kernel/trace/trace_probe.h > @@ -34,6 +34,7 @@ > #define MAX_ARRAY_LEN64 > #define MAX_ARG_NAME_LEN 32 > #define MAX_BTF_ARGS_LEN 128 > +#define MAX_DENTRY_ARGS_LEN 256 Why do you think > #define MAX_STRING_SIZE PATH_MAX > #define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) > > -- > 2.31.1 > > -- Masami Hiramatsu (Google)
[PATCH 0/2] tracing/probes: Fix probe event argument parser
These fix two bugs in the trace probe event argument parser, which I found while I'm cleaning up the code. One is just adding an error message to message log buffer (but it is important to check the reason). Another is setting the size of argument entry correctly with argument data updated by BTF. Thank you, --- Masami Hiramatsu (Google) (2): tracing/probes: Fix to show a parse error for bad type for $comm tracing/probes: Fix to set arg size and fmt after setting type from BTF kernel/trace/trace_probe.c | 32 ++-- kernel/trace/trace_probe.h |3 ++- 2 files changed, 20 insertions(+), 15 deletions(-) -- Masami Hiramatsu (Google)
[PATCH 1/2] tracing/probes: Fix to show a parse error for bad type for $comm
From: Masami Hiramatsu (Google) Fix to show a parse error for bad type (non-string) for $comm/$COMM and immediate-string. With this fix, error_log file shows appropriate error message as below. /sys/kernel/tracing # echo 'p vfs_read $comm:u32' >> kprobe_events sh: write error: Invalid argument /sys/kernel/tracing # echo 'p vfs_read \"hoge":u32' >> kprobe_events sh: write error: Invalid argument /sys/kernel/tracing # cat error_log [ 30.144183] trace_kprobe: error: $comm and immediate-string only accepts string type Command: p vfs_read $comm:u32 ^ [ 62.618500] trace_kprobe: error: $comm and immediate-string only accepts string type Command: p vfs_read \"hoge":u32 ^ Fixes: 3dd1f7f24f8c ("tracing: probeevent: Fix to make the type of $comm string") Cc: sta...@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c |7 +-- kernel/trace/trace_probe.h |3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4dc74d73fc1d..c6da5923e5b9 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, if (!(ctx->flags & TPARG_FL_TEVENT) && (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 || strncmp(arg, "\\\"", 2) == 0)) { - /* The type of $comm must be "string", and not an array. */ - if (parg->count || (t && strcmp(t, "string"))) + /* The type of $comm must be "string", and not an array type. */ + if (parg->count || (t && strcmp(t, "string"))) { + trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), + NEED_STRING_TYPE); goto out; + } parg->type = find_fetch_type("string", ctx->flags); } else parg->type = find_fetch_type(t, ctx->flags); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 850d9ecb6765..c1877d018269 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \ C(NO_BTF_FIELD, "This field is not found."),\ C(BAD_BTF_TID, "Failed to get BTF type info."),\ - C(BAD_TYPE4STR, "This type does not fit for string."), + C(BAD_TYPE4STR, "This type does not fit for string."),\ + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), #undef C #define C(a, b)TP_ERR_##a
[PATCH 2/2] tracing/probes: Fix to set arg size and fmt after setting type from BTF
From: Masami Hiramatsu (Google) Since the BTF type setting updates probe_arg::type, the type size calculation and setting print-fmt should be done after that. Without this fix, the argument size and print-fmt can be wrong. Fixes: b576e09701c7 ("tracing/probes: Support function parameters if BTF is available") Cc: sta...@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 25 + 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index c6da5923e5b9..34289f9c6707 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1172,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE); goto out; } - parg->offset = *size; - *size += parg->type->size * (parg->count ?: 1); - - ret = -ENOMEM; - if (parg->count) { - len = strlen(parg->type->fmttype) + 6; - parg->fmt = kmalloc(len, GFP_KERNEL); - if (!parg->fmt) - goto out; - snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, -parg->count); - } code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); if (!code) @@ -1207,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, goto fail; } } + parg->offset = *size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) { + ret = -ENOMEM; + goto out; + } + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, +parg->count); + } ret = -EINVAL; /* Store operation */
Re: [PATCH net-next v2] vsock/test: add '--peer-port' input argument
On Tue, Jan 23, 2024 at 10:27:50AM +0300, Arseniy Krasnov wrote: > Implement port for given CID as input argument instead of using > hardcoded value '1234'. This allows to run different test instances > on a single CID. Port argument is not required parameter and if it is > not set, then default value will be '1234' - thus we preserve previous > behaviour. > > Signed-off-by: Arseniy Krasnov Acked-by: Michael S. Tsirkin > --- > Changelog: > v1 -> v2: > * Reword usage message. > * Add commas after last field in 'opts' declaration. > * 'RFC' -> 'net-next'. > > tools/testing/vsock/util.c| 17 +++- > tools/testing/vsock/util.h| 4 + > tools/testing/vsock/vsock_diag_test.c | 21 +++-- > tools/testing/vsock/vsock_test.c | 102 +- > tools/testing/vsock/vsock_test_zerocopy.c | 12 +-- > tools/testing/vsock/vsock_uring_test.c| 17 +++- > 6 files changed, 115 insertions(+), 58 deletions(-) > > diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c > index ae2b33c21c45..554b290fefdc 100644 > --- a/tools/testing/vsock/util.c > +++ b/tools/testing/vsock/util.c > @@ -33,8 +33,7 @@ void init_signals(void) > signal(SIGPIPE, SIG_IGN); > } > > -/* Parse a CID in string representation */ > -unsigned int parse_cid(const char *str) > +static unsigned int parse_uint(const char *str, const char *err_str) > { > char *endptr = NULL; > unsigned long n; > @@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str) > errno = 0; > n = strtoul(str, &endptr, 10); > if (errno || *endptr != '\0') { > - fprintf(stderr, "malformed CID \"%s\"\n", str); > + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str); > exit(EXIT_FAILURE); > } > return n; > } > > +/* Parse a CID in string representation */ > +unsigned int parse_cid(const char *str) > +{ > + return parse_uint(str, "CID"); > +} > + > +/* Parse a port in string representation */ > +unsigned int parse_port(const char *str) > +{ > + return parse_uint(str, "port"); > +} > + > /* Wait for the remote to close the connection */ > void vsock_wait_remote_close(int fd) > { > diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h > index 03c88d0cb861..e95e62485959 100644 > --- a/tools/testing/vsock/util.h > +++ b/tools/testing/vsock/util.h > @@ -12,10 +12,13 @@ enum test_mode { > TEST_MODE_SERVER > }; > > +#define DEFAULT_PEER_PORT1234 > + > /* Test runner options */ > struct test_opts { > enum test_mode mode; > unsigned int peer_cid; > + unsigned int peer_port; > }; > > /* A test case definition. Test functions must print failures to stderr and > @@ -35,6 +38,7 @@ struct test_case { > > void init_signals(void); > unsigned int parse_cid(const char *str); > +unsigned int parse_port(const char *str); > int vsock_stream_connect(unsigned int cid, unsigned int port); > int vsock_bind_connect(unsigned int cid, unsigned int port, > unsigned int bind_port, int type); > diff --git a/tools/testing/vsock/vsock_diag_test.c > b/tools/testing/vsock/vsock_diag_test.c > index fa927ad16f8a..9d61b1f1c4c3 100644 > --- a/tools/testing/vsock/vsock_diag_test.c > +++ b/tools/testing/vsock/vsock_diag_test.c > @@ -342,7 +342,7 @@ static void test_listen_socket_server(const struct > test_opts *opts) > } addr = { > .svm = { > .svm_family = AF_VSOCK, > - .svm_port = 1234, > + .svm_port = opts->peer_port, > .svm_cid = VMADDR_CID_ANY, > }, > }; > @@ -378,7 +378,7 @@ static void test_connect_client(const struct test_opts > *opts) > LIST_HEAD(sockets); > struct vsock_stat *st; > > - fd = vsock_stream_connect(opts->peer_cid, 1234); > + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); > if (fd < 0) { > perror("connect"); > exit(EXIT_FAILURE); > @@ -403,7 +403,7 @@ static void test_connect_server(const struct test_opts > *opts) > LIST_HEAD(sockets); > int client_fd; > > - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); > + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); > if (client_fd < 0) { > perror("accept"); > exit(EXIT_FAILURE); > @@ -461,6 +461,11 @@ static const struct option longopts[] = { > .has_arg = required_argument, > .val = 'p', > }, > + { > + .name = "peer-port", > + .has_arg = required_argument, > + .val = 'q', > + }, > { > .name = "list", > .has_arg = no_argument, > @@ -481,7 +486,7 @@ static const struct option longopts[] = { > > static void usage(void) > { > - fprintf(stderr, "Usage: vsock_diag_test [--help] > [--control-host=] -
Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions
On Tue, 23 Jan 2024 11:07:53 + Vincent Donnefort wrote: > index ..5468afc94be7 > --- /dev/null > +++ b/include/uapi/linux/trace_mmap.h > @@ -0,0 +1,44 @@ > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > +#ifndef _TRACE_MMAP_H_ > +#define _TRACE_MMAP_H_ > + > +#include > + > +/** > + * struct trace_buffer_meta - Ring-buffer Meta-page description > + * @meta_page_size: Size of this meta-page. > + * @meta_struct_len: Size of this structure. > + * @subbuf_size: Size of each subbuf, including the header. > + * @nr_subbufs: Number of subbfs in the ring-buffer. > + * @reader.lost_events: Number of events lost at the time of the reader > swap. > + * @reader.id: subbuf ID of the current reader. From 0 to > @nr_subbufs - 1 > + * @reader.read: Number of bytes read on the reader subbuf. > + * @entries: Number of entries in the ring-buffer. > + * @overrun: Number of entries lost in the ring-buffer. > + * @read:Number of entries that have been read. > + * @subbufs_touched: Number of subbufs that have been filled. > + * @subbufs_lost:Number of subbufs lost to overrun. > + * @subbufs_read:Number of subbufs that have been read. Do we actually need the above 3 fields? What's the use case for them? I don't want to expose internals in the API unless they are needed. -- Steve
[PATCH 4/4] hwspinlock: omap: Use index to get hwspinlock pointer
For loops with multiple initializers and increments are hard to read and reason about, simplify this by using the looping index to index into the hwspinlock array. Signed-off-by: Andrew Davis --- drivers/hwspinlock/omap_hwspinlock.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index 1b0a1bea2b24a..a2555c41320d8 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -75,7 +75,6 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = { static int omap_hwspinlock_probe(struct platform_device *pdev) { struct hwspinlock_device *bank; - struct hwspinlock *hwlock; void __iomem *io_base; int num_locks, i, ret; /* Only a single hwspinlock block device is supported */ @@ -117,8 +116,8 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) if (!bank) return -ENOMEM; - for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++) - hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; + for (i = 0; i < num_locks; i++) + bank->lock[i].priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; ret = devm_hwspin_lock_register(&pdev->dev, bank, &omap_hwspinlock_ops, base_id, num_locks); -- 2.39.2
[PATCH 3/4] hwspinlock: omap: Use devm_hwspin_lock_register() helper
This unregister the HW spinlock on module exit, allowing us to remove the remove callback. Do this here. Signed-off-by: Andrew Davis --- drivers/hwspinlock/omap_hwspinlock.c | 17 + 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index 2f18ea6c05e3f..1b0a1bea2b24a 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -117,12 +117,10 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) if (!bank) return -ENOMEM; - platform_set_drvdata(pdev, bank); - for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++) hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; - ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, + ret = devm_hwspin_lock_register(&pdev->dev, bank, &omap_hwspinlock_ops, base_id, num_locks); if (ret) return ret; @@ -133,18 +131,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) return 0; } -static void omap_hwspinlock_remove(struct platform_device *pdev) -{ - struct hwspinlock_device *bank = platform_get_drvdata(pdev); - int ret; - - ret = hwspin_lock_unregister(bank); - if (ret) { - dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); - return; - } -} - static const struct of_device_id omap_hwspinlock_of_match[] = { { .compatible = "ti,omap4-hwspinlock", }, { .compatible = "ti,am64-hwspinlock", }, @@ -155,7 +141,6 @@ MODULE_DEVICE_TABLE(of, omap_hwspinlock_of_match); static struct platform_driver omap_hwspinlock_driver = { .probe = omap_hwspinlock_probe, - .remove_new = omap_hwspinlock_remove, .driver = { .name = "omap_hwspinlock", .of_match_table = omap_hwspinlock_of_match, -- 2.39.2
[PATCH 2/4] hwspinlock: omap: Use devm_pm_runtime_enable() helper
This disables runtime PM on module exit, allowing us to simplify the probe exit path and remove callbacks. Do that here. Signed-off-by: Andrew Davis --- drivers/hwspinlock/omap_hwspinlock.c | 26 -- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index cca55143d24d4..2f18ea6c05e3f 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -89,10 +89,10 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) * make sure the module is enabled and clocked before reading * the module SYSSTATUS register */ - pm_runtime_enable(&pdev->dev); + devm_pm_runtime_enable(&pdev->dev); ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) - goto runtime_err; + return ret; /* Determine number of locks */ i = readl(io_base + SYSSTATUS_OFFSET); @@ -104,22 +104,18 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) */ ret = pm_runtime_put(&pdev->dev); if (ret < 0) - goto runtime_err; + return ret; /* one of the four lsb's must be set, and nothing else */ - if (hweight_long(i & 0xf) != 1 || i > 8) { - ret = -EINVAL; - goto runtime_err; - } + if (hweight_long(i & 0xf) != 1 || i > 8) + return -EINVAL; num_locks = i * 32; /* actual number of locks in this device */ bank = devm_kzalloc(&pdev->dev, struct_size(bank, lock, num_locks), GFP_KERNEL); - if (!bank) { - ret = -ENOMEM; - goto runtime_err; - } + if (!bank) + return -ENOMEM; platform_set_drvdata(pdev, bank); @@ -129,16 +125,12 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, base_id, num_locks); if (ret) - goto runtime_err; + return ret; dev_dbg(&pdev->dev, "Registered %d locks with HwSpinlock core\n", num_locks); return 0; - -runtime_err: - pm_runtime_disable(&pdev->dev); - return ret; } static void omap_hwspinlock_remove(struct platform_device *pdev) @@ -151,8 +143,6 @@ static void omap_hwspinlock_remove(struct platform_device *pdev) dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); return; } - - pm_runtime_disable(&pdev->dev); } static const struct of_device_id omap_hwspinlock_of_match[] = { -- 2.39.2
[PATCH 1/4] hwspinlock: omap: Remove unneeded check for OF node
We do not use the OF node anymore, nor does it matter how we got to probe, so remove the check for of_node. Signed-off-by: Andrew Davis --- drivers/hwspinlock/omap_hwspinlock.c | 4 1 file changed, 4 deletions(-) diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index a9fd9ca45f2a8..cca55143d24d4 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -74,7 +74,6 @@ static const struct hwspinlock_ops omap_hwspinlock_ops = { static int omap_hwspinlock_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; struct hwspinlock_device *bank; struct hwspinlock *hwlock; void __iomem *io_base; @@ -82,9 +81,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) /* Only a single hwspinlock block device is supported */ int base_id = 0; - if (!node) - return -ENODEV; - io_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(io_base)) return PTR_ERR(io_base); -- 2.39.2
Re: [PATCH v7 04/15] x86/sgx: Implement basic EPC misc cgroup functionality
On Mon, 22 Jan 2024 14:25:53 -0600, Jarkko Sakkinen wrote: On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote: From: Kristen Carlson Accardi SGX Enclave Page Cache (EPC) memory allocations are separate from normal RAM allocations, and are managed solely by the SGX subsystem. The existing cgroup memory controller cannot be used to limit or account for SGX EPC memory, which is a desirable feature in some environments. For example, in a Kubernates environment, a user can request certain EPC quota for a pod but the orchestrator can not enforce the quota to limit runtime EPC usage of the pod without an EPC cgroup controller. Utilize the misc controller [admin-guide/cgroup-v2.rst, 5-9. Misc] to limit and track EPC allocations per cgroup. Earlier patches have added the "sgx_epc" resource type in the misc cgroup subsystem. Add basic support in SGX driver as the "sgx_epc" resource provider: - Set "capacity" of EPC by calling misc_cg_set_capacity() - Update EPC usage counter, "current", by calling charge and uncharge APIs for EPC allocation and deallocation, respectively. - Setup sgx_epc resource type specific callbacks, which perform initialization and cleanup during cgroup allocation and deallocation, respectively. With these changes, the misc cgroup controller enables user to set a hard limit for EPC usage in the "misc.max" interface file. It reports current usage in "misc.current", the total EPC memory available in "misc.capacity", and the number of times EPC usage reached the max limit in "misc.events". For now, the EPC cgroup simply blocks additional EPC allocation in sgx_alloc_epc_page() when the limit is reached. Reclaimable pages are still tracked in the global active list, only reclaimed by the global reclaimer when the total free page count is lower than a threshold. Later patches will reorganize the tracking and reclamation code in the global reclaimer and implement per-cgroup tracking and reclaiming. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Kristen Carlson Accardi Co-developed-by: Haitao Huang Signed-off-by: Haitao Huang For consistency sake I'd also add co-developed-by for Kristen. This is at least the format suggested by kernel documentation. --- V7: - Use a static for root cgroup (Kai) - Wrap epc_cg field in sgx_epc_page struct with #ifdef (Kai) - Correct check for charge API return (Kai) - Start initialization in SGX device driver init (Kai) - Remove unneeded BUG_ON (Kai) - Split sgx_get_current_epc_cg() out of sgx_epc_cg_try_charge() (Kai) V6: - Split the original large patch"Limit process EPC usage with misc cgroup controller" and restructure it (Kai) --- arch/x86/Kconfig | 13 + arch/x86/kernel/cpu/sgx/Makefile | 1 + arch/x86/kernel/cpu/sgx/epc_cgroup.c | 79 arch/x86/kernel/cpu/sgx/epc_cgroup.h | 73 + arch/x86/kernel/cpu/sgx/main.c | 52 +- arch/x86/kernel/cpu/sgx/sgx.h| 5 ++ include/linux/misc_cgroup.h | 2 + 7 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 arch/x86/kernel/cpu/sgx/epc_cgroup.c create mode 100644 arch/x86/kernel/cpu/sgx/epc_cgroup.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5edec175b9bf..10c3d1d099b2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1947,6 +1947,19 @@ config X86_SGX If unsure, say N. +config CGROUP_SGX_EPC + bool "Miscellaneous Cgroup Controller for Enclave Page Cache (EPC) for Intel SGX" + depends on X86_SGX && CGROUP_MISC + help + Provides control over the EPC footprint of tasks in a cgroup via + the Miscellaneous cgroup controller. + + EPC is a subset of regular memory that is usable only by SGX + enclaves and is very limited in quantity, e.g. less than 1% + of total DRAM. + + Say N if unsure. + config X86_USER_SHADOW_STACK bool "X86 userspace shadow stack" depends on AS_WRUSS diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile index 9c1656779b2a..12901a488da7 100644 --- a/arch/x86/kernel/cpu/sgx/Makefile +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -4,3 +4,4 @@ obj-y += \ ioctl.o \ main.o obj-$(CONFIG_X86_SGX_KVM) += virt.o +obj-$(CONFIG_CGROUP_SGX_EPC) += epc_cgroup.o diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c new file mode 100644 index ..938695816a9e --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2022 Intel Corporation. + +#include +#include +#include "epc_cgroup.h" + +static struct sgx_epc_cgroup epc_cg_root; + +/** + * sgx_epc_cgroup_try_charge() - try to charge cgroup for a single EPC page + * + * @epc_cg:The EPC cgroup to be charged for the page. + * Return: + * * %0 - If successfully charged. + * * -errno -
Re: [PATCH v7 01/15] cgroup/misc: Add per resource callbacks for CSS events
On Mon, 22 Jan 2024 14:14:01 -0600, Jarkko Sakkinen wrote: On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote: From: Kristen Carlson Accardi The misc cgroup controller (subsystem) currently does not perform resource type specific action for Cgroups Subsystem State (CSS) events: the 'css_alloc' event when a cgroup is created and the 'css_free' event when a cgroup is destroyed. Define callbacks for those events and allow resource providers to register the callbacks per resource type as needed. This will be utilized later by the EPC misc cgroup support implemented in the SGX driver. Signed-off-by: Kristen Carlson Accardi Co-developed-by: Haitao Huang Signed-off-by: Haitao Huang --- V7: - Make ops one per resource type and store them in array (Michal) - Rename the ops struct to misc_res_ops, and enforce the constraints of required callback functions (Jarkko) - Moved addition of priv field to patch 4 where it was used first. (Jarkko) V6: - Create ops struct for per resource callbacks (Jarkko) - Drop max_write callback (Dave, Michal) - Style fixes (Kai) --- include/linux/misc_cgroup.h | 11 +++ kernel/cgroup/misc.c| 60 +++-- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..0806d4436208 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -27,6 +27,16 @@ struct misc_cg; #include +/** + * struct misc_res_ops: per resource type callback ops. + * @alloc: invoked for resource specific initialization when cgroup is allocated. + * @free: invoked for resource specific cleanup when cgroup is deallocated. + */ +struct misc_res_ops { + int (*alloc)(struct misc_cg *cg); + void (*free)(struct misc_cg *cg); +}; + /** * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. @@ -56,6 +66,7 @@ struct misc_cg { u64 misc_cg_res_total_usage(enum misc_res_type type); int misc_cg_set_capacity(enum misc_res_type type, u64 capacity); +int misc_cg_set_ops(enum misc_res_type type, const struct misc_res_ops *ops); int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount); void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount); diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..b8c32791334c 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -39,6 +39,9 @@ static struct misc_cg root_cg; */ static u64 misc_res_capacity[MISC_CG_RES_TYPES]; +/* Resource type specific operations */ +static const struct misc_res_ops *misc_res_ops[MISC_CG_RES_TYPES]; + /** * parent_misc() - Get the parent of the passed misc cgroup. * @cgroup: cgroup whose parent needs to be fetched. @@ -105,6 +108,36 @@ int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) } EXPORT_SYMBOL_GPL(misc_cg_set_capacity); +/** + * misc_cg_set_ops() - set resource specific operations. + * @type: Type of the misc res. + * @ops: Operations for the given type. + * + * Context: Any context. + * Return: + * * %0 - Successfully registered the operations. + * * %-EINVAL - If @type is invalid, or the operations missing any required callbacks. + */ +int misc_cg_set_ops(enum misc_res_type type, const struct misc_res_ops *ops) +{ + if (!valid_type(type)) + return -EINVAL; + + if (!ops->alloc) { + pr_err("%s: alloc missing\n", __func__); + return -EINVAL; + } + + if (!ops->free) { + pr_err("%s: free missing\n", __func__); + return -EINVAL; + } + + misc_res_ops[type] = ops; + return 0; +} +EXPORT_SYMBOL_GPL(misc_cg_set_ops); + /** * misc_cg_cancel_charge() - Cancel the charge from the misc cgroup. * @type: Misc res type in misc cg to cancel the charge from. @@ -383,23 +416,37 @@ static struct cftype misc_cg_files[] = { static struct cgroup_subsys_state * misc_cg_alloc(struct cgroup_subsys_state *parent_css) { + struct misc_cg *parent_cg, *cg; enum misc_res_type i; - struct misc_cg *cg; + int ret; if (!parent_css) { - cg = &root_cg; + parent_cg = cg = &root_cg; } else { cg = kzalloc(sizeof(*cg), GFP_KERNEL); if (!cg) return ERR_PTR(-ENOMEM); + parent_cg = css_misc(parent_css); } for (i = 0; i < MISC_CG_RES_TYPES; i++) { WRITE_ONCE(cg->res[i].max, MAX_NUM); atomic64_set(&cg->res[i].usage, 0); + if (misc_res_ops[i]) { + ret = misc_res_ops[i]->alloc(cg); + if (ret) + goto alloc_err; So I'd consider pattern like this to avoid repetition: if (ret) { __misc_cg_free(cg);
Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions
On Tue, Jan 23, 2024 at 10:51:49AM -0500, Steven Rostedt wrote: > On Tue, 23 Jan 2024 11:07:53 + > Vincent Donnefort wrote: > > > index ..5468afc94be7 > > --- /dev/null > > +++ b/include/uapi/linux/trace_mmap.h > > @@ -0,0 +1,44 @@ > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > > +#ifndef _TRACE_MMAP_H_ > > +#define _TRACE_MMAP_H_ > > + > > +#include > > + > > +/** > > + * struct trace_buffer_meta - Ring-buffer Meta-page description > > + * @meta_page_size:Size of this meta-page. > > + * @meta_struct_len: Size of this structure. > > + * @subbuf_size: Size of each subbuf, including the header. > > + * @nr_subbufs:Number of subbfs in the ring-buffer. > > + * @reader.lost_events:Number of events lost at the time of the reader > > swap. > > + * @reader.id: subbuf ID of the current reader. From 0 to > > @nr_subbufs - 1 > > + * @reader.read: Number of bytes read on the reader subbuf. > > + * @entries: Number of entries in the ring-buffer. > > + * @overrun: Number of entries lost in the ring-buffer. > > + * @read: Number of entries that have been read. > > > > + * @subbufs_touched: Number of subbufs that have been filled. > > + * @subbufs_lost: Number of subbufs lost to overrun. > > + * @subbufs_read: Number of subbufs that have been read. > > Do we actually need the above 3 fields? > > What's the use case for them? I don't want to expose internals in the API > unless they are needed. subbufs_read is gone, I just forgot to remove it here :-\. The two other ones are used for tracing with the hypervisor. That's why I preemptively added them. I can remove them and add just append this struct later ... or just overload this struct with another one, only shared between the kernel and the hypervisor? > > -- Steve
Re: [PATCH v12 2/6] ring-buffer: Introducing ring-buffer mapping functions
On Tue, 23 Jan 2024 17:48:17 + Vincent Donnefort wrote: > > > + * @subbufs_touched: Number of subbufs that have been filled. > > > + * @subbufs_lost:Number of subbufs lost to overrun. > > > + * @subbufs_read:Number of subbufs that have been read. > > > > Do we actually need the above 3 fields? > > > > What's the use case for them? I don't want to expose internals in the API > > unless they are needed. > > subbufs_read is gone, I just forgot to remove it here :-\. > > The two other ones are used for tracing with the hypervisor. That's why I > preemptively added them. > > I can remove them and add just append this struct later ... or just overload > this struct with another one, only shared between the kernel and the > hypervisor? Yes, let's have them be different for now. Or we could just make them "reserved" and say that they are not used for now. I also think we should add a flags value too, so that when they are no longer reserved, a flag can be set to say that they are active. struct trace_buffer_meta { __u32 meta_page_size; __u32 meta_struct_len; __u32 subbuf_size; __u32 nr_subbufs; struct { __u64 lost_events; __u32 id; __u32 read; } reader; __u64 flags; __u64 entries; __u64 overrun; __u64 read; __u64 Reserved1; __u64 Reserved2; }; And that way the hypervisor would still have those available, and updated, but they are not filled for user space. The hypervisor could even set the flag field saying it's using them. That is, the first flag is: enum { TRACE_BUFFER_META_FL_SUBUF_INFO = (1 << 0), }; And if that's set, those fields are available. But we just don't set them now ;-) -- Steve
[PATCH 5/9] remoteproc: qcom_q6v5_pas: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/qcom_q6v5_pas.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index a9dd58608052c..60b0cd30c0592 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -678,7 +678,7 @@ static int adsp_probe(struct platform_device *pdev) if (desc->minidump_id) ops = &adsp_minidump_ops; - rproc = rproc_alloc(&pdev->dev, pdev->name, ops, fw_name, sizeof(*adsp)); + rproc = devm_rproc_alloc(&pdev->dev, pdev->name, ops, fw_name, sizeof(*adsp)); if (!rproc) { dev_err(&pdev->dev, "unable to allocate remoteproc\n"); @@ -754,7 +754,6 @@ static int adsp_probe(struct platform_device *pdev) adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); free_rproc: device_init_wakeup(adsp->dev, false); - rproc_free(rproc); return ret; } @@ -773,7 +772,6 @@ static void adsp_remove(struct platform_device *pdev) qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev); adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); device_init_wakeup(adsp->dev, false); - rproc_free(adsp->rproc); } static const struct adsp_data adsp_resource_init = { -- 2.39.2
[PATCH 1/9] remoteproc: imx_dsp_rproc: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/imx_dsp_rproc.c | 11 --- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c index a1c62d15f16c6..56a799cb8b363 100644 --- a/drivers/remoteproc/imx_dsp_rproc.c +++ b/drivers/remoteproc/imx_dsp_rproc.c @@ -1104,8 +1104,8 @@ static int imx_dsp_rproc_probe(struct platform_device *pdev) return ret; } - rproc = rproc_alloc(dev, "imx-dsp-rproc", &imx_dsp_rproc_ops, fw_name, - sizeof(*priv)); + rproc = devm_rproc_alloc(dev, "imx-dsp-rproc", &imx_dsp_rproc_ops, +fw_name, sizeof(*priv)); if (!rproc) return -ENOMEM; @@ -1125,14 +1125,14 @@ static int imx_dsp_rproc_probe(struct platform_device *pdev) ret = imx_dsp_rproc_detect_mode(priv); if (ret) { dev_err(dev, "failed on imx_dsp_rproc_detect_mode\n"); - goto err_put_rproc; + return ret; } /* There are multiple power domains required by DSP on some platform */ ret = imx_dsp_attach_pm_domains(priv); if (ret) { dev_err(dev, "failed on imx_dsp_attach_pm_domains\n"); - goto err_put_rproc; + return ret; } /* Get clocks */ ret = imx_dsp_rproc_clk_get(priv); @@ -1155,8 +1155,6 @@ static int imx_dsp_rproc_probe(struct platform_device *pdev) err_detach_domains: imx_dsp_detach_pm_domains(priv); -err_put_rproc: - rproc_free(rproc); return ret; } @@ -1169,7 +1167,6 @@ static void imx_dsp_rproc_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); rproc_del(rproc); imx_dsp_detach_pm_domains(priv); - rproc_free(rproc); } /* pm runtime functions */ -- 2.39.2
[PATCH 6/9] remoteproc: qcom_q6v5_wcss: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/qcom_q6v5_wcss.c | 24 +--- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c b/drivers/remoteproc/qcom_q6v5_wcss.c index cff1fa07d1def..94f68c919ee62 100644 --- a/drivers/remoteproc/qcom_q6v5_wcss.c +++ b/drivers/remoteproc/qcom_q6v5_wcss.c @@ -1011,8 +1011,8 @@ static int q6v5_wcss_probe(struct platform_device *pdev) if (!desc) return -EINVAL; - rproc = rproc_alloc(&pdev->dev, pdev->name, desc->ops, - desc->firmware_name, sizeof(*wcss)); + rproc = devm_rproc_alloc(&pdev->dev, pdev->name, desc->ops, +desc->firmware_name, sizeof(*wcss)); if (!rproc) { dev_err(&pdev->dev, "failed to allocate rproc\n"); return -ENOMEM; @@ -1027,29 +1027,29 @@ static int q6v5_wcss_probe(struct platform_device *pdev) ret = q6v5_wcss_init_mmio(wcss, pdev); if (ret) - goto free_rproc; + return ret; ret = q6v5_alloc_memory_region(wcss); if (ret) - goto free_rproc; + return ret; if (wcss->version == WCSS_QCS404) { ret = q6v5_wcss_init_clock(wcss); if (ret) - goto free_rproc; + return ret; ret = q6v5_wcss_init_regulator(wcss); if (ret) - goto free_rproc; + return ret; } ret = q6v5_wcss_init_reset(wcss, desc); if (ret) - goto free_rproc; + return ret; ret = qcom_q6v5_init(&wcss->q6v5, pdev, rproc, desc->crash_reason_smem, NULL, NULL); if (ret) - goto free_rproc; + return ret; qcom_add_glink_subdev(rproc, &wcss->glink_subdev, "q6wcss"); qcom_add_ssr_subdev(rproc, &wcss->ssr_subdev, "q6wcss"); @@ -1061,16 +1061,11 @@ static int q6v5_wcss_probe(struct platform_device *pdev) ret = rproc_add(rproc); if (ret) - goto free_rproc; + return ret; platform_set_drvdata(pdev, rproc); return 0; - -free_rproc: - rproc_free(rproc); - - return ret; } static void q6v5_wcss_remove(struct platform_device *pdev) @@ -1080,7 +1075,6 @@ static void q6v5_wcss_remove(struct platform_device *pdev) qcom_q6v5_deinit(&wcss->q6v5); rproc_del(rproc); - rproc_free(rproc); } static const struct wcss_data wcss_ipq8074_res_init = { -- 2.39.2
[PATCH 3/9] remoteproc: qcom_q6v5_adsp: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/qcom_q6v5_adsp.c | 14 +- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c b/drivers/remoteproc/qcom_q6v5_adsp.c index 6c67514cc4931..34ac996a93b20 100644 --- a/drivers/remoteproc/qcom_q6v5_adsp.c +++ b/drivers/remoteproc/qcom_q6v5_adsp.c @@ -683,8 +683,8 @@ static int adsp_probe(struct platform_device *pdev) return ret; } - rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_ops, - firmware_name, sizeof(*adsp)); + rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &adsp_ops, +firmware_name, sizeof(*adsp)); if (!rproc) { dev_err(&pdev->dev, "unable to allocate remoteproc\n"); return -ENOMEM; @@ -709,17 +709,17 @@ static int adsp_probe(struct platform_device *pdev) ret = adsp_alloc_memory_region(adsp); if (ret) - goto free_rproc; + return ret; ret = adsp_init_clock(adsp, desc->clk_ids); if (ret) - goto free_rproc; + return ret; ret = qcom_rproc_pds_attach(adsp->dev, adsp, desc->proxy_pd_names); if (ret < 0) { dev_err(&pdev->dev, "Failed to attach proxy power domains\n"); - goto free_rproc; + return ret; } adsp->proxy_pd_count = ret; @@ -755,9 +755,6 @@ static int adsp_probe(struct platform_device *pdev) disable_pm: qcom_rproc_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); -free_rproc: - rproc_free(rproc); - return ret; } @@ -772,7 +769,6 @@ static void adsp_remove(struct platform_device *pdev) qcom_remove_sysmon_subdev(adsp->sysmon); qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev); qcom_rproc_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); - rproc_free(adsp->rproc); } static const struct adsp_pil_data adsp_resource_init = { -- 2.39.2
[PATCH 7/9] remoteproc: qcom_wcnss: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/qcom_wcnss.c | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 90de22c81da97..a7bb9da27029d 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -555,8 +555,8 @@ static int wcnss_probe(struct platform_device *pdev) if (ret < 0 && ret != -EINVAL) return ret; - rproc = rproc_alloc(&pdev->dev, pdev->name, &wcnss_ops, - fw_name, sizeof(*wcnss)); + rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &wcnss_ops, +fw_name, sizeof(*wcnss)); if (!rproc) { dev_err(&pdev->dev, "unable to allocate remoteproc\n"); return -ENOMEM; @@ -574,14 +574,12 @@ static int wcnss_probe(struct platform_device *pdev) mutex_init(&wcnss->iris_lock); mmio = devm_platform_ioremap_resource_byname(pdev, "pmu"); - if (IS_ERR(mmio)) { - ret = PTR_ERR(mmio); - goto free_rproc; - } + if (IS_ERR(mmio)) + return PTR_ERR(mmio); ret = wcnss_alloc_memory_region(wcnss); if (ret) - goto free_rproc; + return ret; wcnss->pmu_cfg = mmio + data->pmu_offset; wcnss->spare_out = mmio + data->spare_offset; @@ -592,7 +590,7 @@ static int wcnss_probe(struct platform_device *pdev) */ ret = wcnss_init_pds(wcnss, data->pd_names); if (ret && (ret != -ENODATA || !data->num_pd_vregs)) - goto free_rproc; + return ret; ret = wcnss_init_regulators(wcnss, data->vregs, data->num_vregs, data->num_pd_vregs); @@ -656,8 +654,6 @@ static int wcnss_probe(struct platform_device *pdev) qcom_iris_remove(wcnss->iris); detach_pds: wcnss_release_pds(wcnss); -free_rproc: - rproc_free(rproc); return ret; } @@ -673,7 +669,6 @@ static void wcnss_remove(struct platform_device *pdev) qcom_remove_sysmon_subdev(wcnss->sysmon); qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev); wcnss_release_pds(wcnss); - rproc_free(wcnss->rproc); } static const struct of_device_id wcnss_of_match[] = { -- 2.39.2
[PATCH 8/9] remoteproc: st: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/st_remoteproc.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/remoteproc/st_remoteproc.c b/drivers/remoteproc/st_remoteproc.c index cb163766c56d5..1340be9d01101 100644 --- a/drivers/remoteproc/st_remoteproc.c +++ b/drivers/remoteproc/st_remoteproc.c @@ -347,23 +347,21 @@ static int st_rproc_probe(struct platform_device *pdev) int enabled; int ret, i; - rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata)); + rproc = devm_rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata)); if (!rproc) return -ENOMEM; rproc->has_iommu = false; ddata = rproc->priv; ddata->config = (struct st_rproc_config *)device_get_match_data(dev); - if (!ddata->config) { - ret = -ENODEV; - goto free_rproc; - } + if (!ddata->config) + return -ENODEV; platform_set_drvdata(pdev, rproc); ret = st_rproc_parse_dt(pdev); if (ret) - goto free_rproc; + return ret; enabled = st_rproc_state(pdev); if (enabled < 0) { @@ -439,8 +437,7 @@ static int st_rproc_probe(struct platform_device *pdev) mbox_free_channel(ddata->mbox_chan[i]); free_clk: clk_unprepare(ddata->clk); -free_rproc: - rproc_free(rproc); + return ret; } @@ -456,8 +453,6 @@ static void st_rproc_remove(struct platform_device *pdev) for (i = 0; i < ST_RPROC_MAX_VRING * MBOX_MAX; i++) mbox_free_channel(ddata->mbox_chan[i]); - - rproc_free(rproc); } static struct platform_driver st_rproc_driver = { -- 2.39.2
[PATCH 4/9] remoteproc: qcom_q6v5_mss: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/qcom_q6v5_mss.c | 28 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index 394b2c1cb5e21..1779fc890e102 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -1990,8 +1990,8 @@ static int q6v5_probe(struct platform_device *pdev) return ret; } - rproc = rproc_alloc(&pdev->dev, pdev->name, &q6v5_ops, - mba_image, sizeof(*qproc)); + rproc = devm_rproc_alloc(&pdev->dev, pdev->name, &q6v5_ops, +mba_image, sizeof(*qproc)); if (!rproc) { dev_err(&pdev->dev, "failed to allocate rproc\n"); return -ENOMEM; @@ -2008,7 +2008,7 @@ static int q6v5_probe(struct platform_device *pdev) 1, &qproc->hexagon_mdt_image); if (ret < 0 && ret != -EINVAL) { dev_err(&pdev->dev, "unable to read mpss firmware-name\n"); - goto free_rproc; + return ret; } platform_set_drvdata(pdev, qproc); @@ -2019,17 +2019,17 @@ static int q6v5_probe(struct platform_device *pdev) qproc->has_spare_reg = desc->has_spare_reg; ret = q6v5_init_mem(qproc, pdev); if (ret) - goto free_rproc; + return ret; ret = q6v5_alloc_memory_region(qproc); if (ret) - goto free_rproc; + return ret; ret = q6v5_init_clocks(&pdev->dev, qproc->proxy_clks, desc->proxy_clk_names); if (ret < 0) { dev_err(&pdev->dev, "Failed to get proxy clocks.\n"); - goto free_rproc; + return ret; } qproc->proxy_clk_count = ret; @@ -2037,7 +2037,7 @@ static int q6v5_probe(struct platform_device *pdev) desc->reset_clk_names); if (ret < 0) { dev_err(&pdev->dev, "Failed to get reset clocks.\n"); - goto free_rproc; + return ret; } qproc->reset_clk_count = ret; @@ -2045,7 +2045,7 @@ static int q6v5_probe(struct platform_device *pdev) desc->active_clk_names); if (ret < 0) { dev_err(&pdev->dev, "Failed to get active clocks.\n"); - goto free_rproc; + return ret; } qproc->active_clk_count = ret; @@ -2053,7 +2053,7 @@ static int q6v5_probe(struct platform_device *pdev) desc->proxy_supply); if (ret < 0) { dev_err(&pdev->dev, "Failed to get proxy regulators.\n"); - goto free_rproc; + return ret; } qproc->proxy_reg_count = ret; @@ -2061,7 +2061,7 @@ static int q6v5_probe(struct platform_device *pdev) desc->active_supply); if (ret < 0) { dev_err(&pdev->dev, "Failed to get active regulators.\n"); - goto free_rproc; + return ret; } qproc->active_reg_count = ret; @@ -2074,12 +2074,12 @@ static int q6v5_probe(struct platform_device *pdev) desc->fallback_proxy_supply); if (ret < 0) { dev_err(&pdev->dev, "Failed to get fallback proxy regulators.\n"); - goto free_rproc; + return ret; } qproc->fallback_proxy_reg_count = ret; } else if (ret < 0) { dev_err(&pdev->dev, "Failed to init power domains\n"); - goto free_rproc; + return ret; } else { qproc->proxy_pd_count = ret; } @@ -2127,8 +2127,6 @@ static int q6v5_probe(struct platform_device *pdev) qcom_remove_glink_subdev(rproc, &qproc->glink_subdev); detach_proxy_pds: q6v5_pds_detach(qproc, qproc->proxy_pds, qproc->proxy_pd_count); -free_rproc: - rproc_free(rproc); return ret; } @@ -2149,8 +2147,6 @@ static void q6v5_remove(struct platform_device *pdev) qcom_remove_glink_subdev(rproc, &qproc->glink_subdev); q6v5_pds_detach(qproc, qproc->proxy_pds, qproc->proxy_pd_count); - - rproc_free(rproc); } static const struct rproc_hexagon_res sc7180_mss = { -- 2.39.2
[PATCH 2/9] remoteproc: imx_rproc: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/imx_rproc.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 8bb293b9f327c..55ecce3ab5f75 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -1104,16 +1104,14 @@ static int imx_rproc_probe(struct platform_device *pdev) int ret; /* set some other name then imx */ - rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops, - NULL, sizeof(*priv)); + rproc = devm_rproc_alloc(dev, "imx-rproc", &imx_rproc_ops, +NULL, sizeof(*priv)); if (!rproc) return -ENOMEM; dcfg = of_device_get_match_data(dev); - if (!dcfg) { - ret = -EINVAL; - goto err_put_rproc; - } + if (!dcfg) + return -EINVAL; priv = rproc->priv; priv->rproc = rproc; @@ -1124,8 +1122,7 @@ static int imx_rproc_probe(struct platform_device *pdev) priv->workqueue = create_workqueue(dev_name(dev)); if (!priv->workqueue) { dev_err(dev, "cannot create workqueue\n"); - ret = -ENOMEM; - goto err_put_rproc; + return -ENOMEM; } ret = imx_rproc_xtr_mbox_init(rproc); @@ -1167,8 +1164,6 @@ static int imx_rproc_probe(struct platform_device *pdev) imx_rproc_free_mbox(rproc); err_put_wkq: destroy_workqueue(priv->workqueue); -err_put_rproc: - rproc_free(rproc); return ret; } @@ -1183,7 +1178,6 @@ static void imx_rproc_remove(struct platform_device *pdev) imx_rproc_put_scu(rproc); imx_rproc_free_mbox(rproc); destroy_workqueue(priv->workqueue); - rproc_free(rproc); } static const struct of_device_id imx_rproc_of_match[] = { -- 2.39.2
[PATCH 9/9] remoteproc: stm32: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/stm32_rproc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/remoteproc/stm32_rproc.c b/drivers/remoteproc/stm32_rproc.c index 4f469f0bcf8b2..fed0866de1819 100644 --- a/drivers/remoteproc/stm32_rproc.c +++ b/drivers/remoteproc/stm32_rproc.c @@ -843,7 +843,7 @@ static int stm32_rproc_probe(struct platform_device *pdev) if (ret) return ret; - rproc = rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata)); + rproc = devm_rproc_alloc(dev, np->name, &st_rproc_ops, NULL, sizeof(*ddata)); if (!rproc) return -ENOMEM; @@ -897,7 +897,6 @@ static int stm32_rproc_probe(struct platform_device *pdev) dev_pm_clear_wake_irq(dev); device_init_wakeup(dev, false); } - rproc_free(rproc); return ret; } @@ -918,7 +917,6 @@ static void stm32_rproc_remove(struct platform_device *pdev) dev_pm_clear_wake_irq(dev); device_init_wakeup(dev, false); } - rproc_free(rproc); } static int stm32_rproc_suspend(struct device *dev) -- 2.39.2
[PATCH 1/8] remoteproc: k3-dsp: Use devm_rproc_alloc() helper
Use the device lifecycle managed allocation function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index ab882e3b7130b..93fbc89307d6a 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -690,8 +690,8 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to parse firmware-name property\n"); - rproc = rproc_alloc(dev, dev_name(dev), &k3_dsp_rproc_ops, fw_name, - sizeof(*kproc)); + rproc = devm_rproc_alloc(dev, dev_name(dev), &k3_dsp_rproc_ops, +fw_name, sizeof(*kproc)); if (!rproc) return -ENOMEM; @@ -707,12 +707,9 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) kproc->data = data; kproc->ti_sci = ti_sci_get_by_phandle(np, "ti,sci"); - if (IS_ERR(kproc->ti_sci)) { - ret = dev_err_probe(dev, PTR_ERR(kproc->ti_sci), - "failed to get ti-sci handle\n"); - kproc->ti_sci = NULL; - goto free_rproc; - } + if (IS_ERR(kproc->ti_sci)) + return dev_err_probe(dev, PTR_ERR(kproc->ti_sci), +"failed to get ti-sci handle\n"); ret = of_property_read_u32(np, "ti,sci-dev-id", &kproc->ti_sci_id); if (ret) { @@ -810,8 +807,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) ret1 = ti_sci_put_handle(kproc->ti_sci); if (ret1) dev_err(dev, "failed to put ti_sci handle (%pe)\n", ERR_PTR(ret1)); -free_rproc: - rproc_free(rproc); return ret; } @@ -844,7 +839,6 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) dev_err(dev, "failed to put ti_sci handle (%pe)\n", ERR_PTR(ret)); k3_dsp_reserved_mem_exit(kproc); - rproc_free(kproc->rproc); } static const struct k3_dsp_mem_data c66_mems[] = { -- 2.39.2
[PATCH 2/8] remoteproc: k3-dsp: Add devm action to release reserved memory
Use a device lifecycle managed action to release reserved memory. This helps prevent mistakes like releasing out of order in cleanup functions and forgetting to release on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 93fbc89307d6a..0cb00146fe977 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -550,6 +550,13 @@ static int k3_dsp_rproc_of_get_memories(struct platform_device *pdev, return 0; } +static void k3_dsp_mem_release(void *data) +{ + struct device *dev = data; + + of_reserved_mem_device_release(dev); +} + static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) { struct device *dev = kproc->dev; @@ -579,13 +586,14 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) ERR_PTR(ret)); return ret; } + ret = devm_add_action_or_reset(dev, k3_dsp_mem_release, dev); + if (ret) + return ret; num_rmems--; kproc->rmem = kcalloc(num_rmems, sizeof(*kproc->rmem), GFP_KERNEL); - if (!kproc->rmem) { - ret = -ENOMEM; - goto release_rmem; - } + if (!kproc->rmem) + return -ENOMEM; /* use remaining reserved memory regions for static carveouts */ for (i = 0; i < num_rmems; i++) { @@ -628,8 +636,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) for (i--; i >= 0; i--) iounmap(kproc->rmem[i].cpu_addr); kfree(kproc->rmem); -release_rmem: - of_reserved_mem_device_release(kproc->dev); return ret; } @@ -640,8 +646,6 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc *kproc) for (i = 0; i < kproc->num_rmems; i++) iounmap(kproc->rmem[i].cpu_addr); kfree(kproc->rmem); - - of_reserved_mem_device_release(kproc->dev); } static -- 2.39.2
[PATCH 3/8] remoteproc: k3-dsp: Use devm_kcalloc() helper
Use a device lifecycle managed action to free memory. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 0cb00146fe977..a13552c71f440 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -591,7 +591,7 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) return ret; num_rmems--; - kproc->rmem = kcalloc(num_rmems, sizeof(*kproc->rmem), GFP_KERNEL); + kproc->rmem = devm_kcalloc(dev, num_rmems, sizeof(*kproc->rmem), GFP_KERNEL); if (!kproc->rmem) return -ENOMEM; @@ -635,7 +635,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) unmap_rmem: for (i--; i >= 0; i--) iounmap(kproc->rmem[i].cpu_addr); - kfree(kproc->rmem); return ret; } @@ -645,7 +644,6 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc *kproc) for (i = 0; i < kproc->num_rmems; i++) iounmap(kproc->rmem[i].cpu_addr); - kfree(kproc->rmem); } static -- 2.39.2
[PATCH 4/8] remoteproc: k3-dsp: Use devm_ti_sci_get_by_phandle() helper
Use the device lifecycle managed TI-SCI get() function. This helps prevent mistakes like not put()'ing in the wrong order in cleanup functions and forgetting to put() on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 33 +++ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index a13552c71f440..0240340a83e90 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -682,7 +682,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) const char *fw_name; bool p_state = false; int ret = 0; - int ret1; data = of_device_get_match_data(dev); if (!data) @@ -708,30 +707,24 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) kproc->dev = dev; kproc->data = data; - kproc->ti_sci = ti_sci_get_by_phandle(np, "ti,sci"); + kproc->ti_sci = devm_ti_sci_get_by_phandle(dev, "ti,sci"); if (IS_ERR(kproc->ti_sci)) return dev_err_probe(dev, PTR_ERR(kproc->ti_sci), "failed to get ti-sci handle\n"); ret = of_property_read_u32(np, "ti,sci-dev-id", &kproc->ti_sci_id); - if (ret) { - dev_err_probe(dev, ret, "missing 'ti,sci-dev-id' property\n"); - goto put_sci; - } + if (ret) + return dev_err_probe(dev, ret, "missing 'ti,sci-dev-id' property\n"); kproc->reset = devm_reset_control_get_exclusive(dev, NULL); - if (IS_ERR(kproc->reset)) { - ret = dev_err_probe(dev, PTR_ERR(kproc->reset), - "failed to get reset\n"); - goto put_sci; - } + if (IS_ERR(kproc->reset)) + return dev_err_probe(dev, PTR_ERR(kproc->reset), +"failed to get reset\n"); kproc->tsp = k3_dsp_rproc_of_get_tsp(dev, kproc->ti_sci); - if (IS_ERR(kproc->tsp)) { - ret = dev_err_probe(dev, PTR_ERR(kproc->tsp), - "failed to construct ti-sci proc control\n"); - goto put_sci; - } + if (IS_ERR(kproc->tsp)) + return dev_err_probe(dev, PTR_ERR(kproc->tsp), +"failed to construct ti-sci proc control\n"); ret = ti_sci_proc_request(kproc->tsp); if (ret < 0) { @@ -805,10 +798,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1)); free_tsp: kfree(kproc->tsp); -put_sci: - ret1 = ti_sci_put_handle(kproc->ti_sci); - if (ret1) - dev_err(dev, "failed to put ti_sci handle (%pe)\n", ERR_PTR(ret1)); return ret; } @@ -836,10 +825,6 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) kfree(kproc->tsp); - ret = ti_sci_put_handle(kproc->ti_sci); - if (ret) - dev_err(dev, "failed to put ti_sci handle (%pe)\n", ERR_PTR(ret)); - k3_dsp_reserved_mem_exit(kproc); } -- 2.39.2
[PATCH 7/8] remoteproc: k3-dsp: Use devm_ioremap_wc() helper
Use a device lifecycle managed ioremap helper function. This helps prevent mistakes like unmapping out of order in cleanup functions and forgetting to unmap on all error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 48 +-- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 800c8c6767086..f799f74734b4a 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -598,16 +598,13 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) /* use remaining reserved memory regions for static carveouts */ for (i = 0; i < num_rmems; i++) { rmem_np = of_parse_phandle(np, "memory-region", i + 1); - if (!rmem_np) { - ret = -EINVAL; - goto unmap_rmem; - } + if (!rmem_np) + return -EINVAL; rmem = of_reserved_mem_lookup(rmem_np); if (!rmem) { of_node_put(rmem_np); - ret = -EINVAL; - goto unmap_rmem; + return -EINVAL; } of_node_put(rmem_np); @@ -615,12 +612,11 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) /* 64-bit address regions currently not supported */ kproc->rmem[i].dev_addr = (u32)rmem->base; kproc->rmem[i].size = rmem->size; - kproc->rmem[i].cpu_addr = ioremap_wc(rmem->base, rmem->size); + kproc->rmem[i].cpu_addr = devm_ioremap_wc(dev, rmem->base, rmem->size); if (!kproc->rmem[i].cpu_addr) { dev_err(dev, "failed to map reserved memory#%d at %pa of size %pa\n", i + 1, &rmem->base, &rmem->size); - ret = -ENOMEM; - goto unmap_rmem; + return -ENOMEM; } dev_dbg(dev, "reserved memory%d: bus addr %pa size 0x%zx va %pK da 0x%x\n", @@ -631,19 +627,6 @@ static int k3_dsp_reserved_mem_init(struct k3_dsp_rproc *kproc) kproc->num_rmems = num_rmems; return 0; - -unmap_rmem: - for (i--; i >= 0; i--) - iounmap(kproc->rmem[i].cpu_addr); - return ret; -} - -static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc *kproc) -{ - int i; - - for (i = 0; i < kproc->num_rmems; i++) - iounmap(kproc->rmem[i].cpu_addr); } static void k3_dsp_release_tsp(void *data) @@ -752,10 +735,8 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) ret = kproc->ti_sci->ops.dev_ops.is_on(kproc->ti_sci, kproc->ti_sci_id, NULL, &p_state); - if (ret) { - dev_err_probe(dev, ret, "failed to get initial state, mode cannot be determined\n"); - goto release_mem; - } + if (ret) + return dev_err_probe(dev, ret, "failed to get initial state, mode cannot be determined\n"); /* configure J721E devices for either remoteproc or IPC-only mode */ if (p_state) { @@ -779,8 +760,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) if (data->uses_lreset) { ret = reset_control_status(kproc->reset); if (ret < 0) { - dev_err_probe(dev, ret, "failed to get reset status\n"); - goto release_mem; + return dev_err_probe(dev, ret, "failed to get reset status\n"); } else if (ret == 0) { dev_warn(dev, "local reset is deasserted for device\n"); k3_dsp_rproc_reset(kproc); @@ -789,18 +769,12 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) } ret = rproc_add(rproc); - if (ret) { - dev_err_probe(dev, ret, "failed to add register device with remoteproc core\n"); - goto release_mem; - } + if (ret) + return dev_err_probe(dev, ret, "failed to add register device with remoteproc core\n"); platform_set_drvdata(pdev, kproc); return 0; - -release_mem: - k3_dsp_reserved_mem_exit(kproc); - return ret; } static void k3_dsp_rproc_remove(struct platform_device *pdev) @@ -820,8 +794,6 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) } rproc_del(kproc->rproc); - - k3_dsp_reserved_mem_exit(kproc); } static const struct k3_dsp_mem_data c66_mems[] = { -- 2.39.2
[PATCH 6/8] remoteproc: k3-dsp: Add devm action to release tsp
Use a device lifecycle managed action to release tps ti_sci_proc handle. This helps prevent mistakes like releasing out of order in cleanup functions and forgetting to release on error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 26 +++ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 2aac25d013985..800c8c6767086 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -646,6 +646,13 @@ static void k3_dsp_reserved_mem_exit(struct k3_dsp_rproc *kproc) iounmap(kproc->rmem[i].cpu_addr); } +static void k3_dsp_release_tsp(void *data) +{ + struct ti_sci_proc *tsp = data; + + ti_sci_proc_release(tsp); +} + static struct ti_sci_proc *k3_dsp_rproc_of_get_tsp(struct device *dev, const struct ti_sci_handle *sci) @@ -731,16 +738,17 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) dev_err_probe(dev, ret, "ti_sci_proc_request failed\n"); return ret; } + ret = devm_add_action_or_reset(dev, k3_dsp_release_tsp, kproc->tsp); + if (ret) + return ret; ret = k3_dsp_rproc_of_get_memories(pdev, kproc); if (ret) - goto release_tsp; + return ret; ret = k3_dsp_reserved_mem_init(kproc); - if (ret) { - dev_err_probe(dev, ret, "reserved memory init failed\n"); - goto release_tsp; - } + if (ret) + return dev_err_probe(dev, ret, "reserved memory init failed\n"); ret = kproc->ti_sci->ops.dev_ops.is_on(kproc->ti_sci, kproc->ti_sci_id, NULL, &p_state); @@ -792,10 +800,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) release_mem: k3_dsp_reserved_mem_exit(kproc); -release_tsp: - ret1 = ti_sci_proc_release(kproc->tsp); - if (ret1) - dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1)); return ret; } @@ -817,10 +821,6 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) rproc_del(kproc->rproc); - ret = ti_sci_proc_release(kproc->tsp); - if (ret) - dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret)); - k3_dsp_reserved_mem_exit(kproc); } -- 2.39.2
[PATCH 8/8] remoteproc: k3-dsp: Use devm_rproc_add() helper
Use device lifecycle managed devm_rproc_add() helper function. This helps prevent mistakes like deleting out of order in cleanup functions and forgetting to delete on all error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index f799f74734b4a..3555b535b1683 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -768,7 +768,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) } } - ret = rproc_add(rproc); + ret = devm_rproc_add(dev, rproc); if (ret) return dev_err_probe(dev, ret, "failed to add register device with remoteproc core\n"); @@ -786,14 +786,9 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) if (rproc->state == RPROC_ATTACHED) { ret = rproc_detach(rproc); - if (ret) { - /* Note this error path leaks resources */ + if (ret) dev_err(dev, "failed to detach proc (%pe)\n", ERR_PTR(ret)); - return; - } } - - rproc_del(kproc->rproc); } static const struct k3_dsp_mem_data c66_mems[] = { -- 2.39.2
[PATCH 5/8] remoteproc: k3-dsp: Use devm_kzalloc() helper
Use device lifecycle managed devm_kzalloc() helper function. This helps prevent mistakes like freeing out of order in cleanup functions and forgetting to free on all error paths. Signed-off-by: Andrew Davis --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index 0240340a83e90..2aac25d013985 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -659,7 +659,7 @@ struct ti_sci_proc *k3_dsp_rproc_of_get_tsp(struct device *dev, if (ret < 0) return ERR_PTR(ret); - tsp = kzalloc(sizeof(*tsp), GFP_KERNEL); + tsp = devm_kzalloc(dev, sizeof(*tsp), GFP_KERNEL); if (!tsp) return ERR_PTR(-ENOMEM); @@ -729,7 +729,7 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) ret = ti_sci_proc_request(kproc->tsp); if (ret < 0) { dev_err_probe(dev, ret, "ti_sci_proc_request failed\n"); - goto free_tsp; + return ret; } ret = k3_dsp_rproc_of_get_memories(pdev, kproc); @@ -796,8 +796,6 @@ static int k3_dsp_rproc_probe(struct platform_device *pdev) ret1 = ti_sci_proc_release(kproc->tsp); if (ret1) dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret1)); -free_tsp: - kfree(kproc->tsp); return ret; } @@ -823,8 +821,6 @@ static void k3_dsp_rproc_remove(struct platform_device *pdev) if (ret) dev_err(dev, "failed to release proc (%pe)\n", ERR_PTR(ret)); - kfree(kproc->tsp); - k3_dsp_reserved_mem_exit(kproc); } -- 2.39.2
Re: [PATCH 0/2] remoteproc: stm32: Fix sparse warnings
On Wed, Jan 17, 2024 at 02:53:10PM +0100, Arnaud Pouliquen wrote: > Fix warnings reported by sparse using make option "C=1" > > Arnaud Pouliquen (2): > remoteproc: stm32: Fix incorrect type in assignment for va > remoteproc: stm32: Fix incorrect type assignment returned by > stm32_rproc_get_loaded_rsc_tablef > > drivers/remoteproc/stm32_rproc.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > I have applied this set. Thanks, Mathieu > > base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a > -- > 2.25.1 >
[PATCH 3/3] arm64: dts: qcom: msm8953: add reset for display subsystem
From: Vladimir Lypak With this reset we can avoid situations like IRQ storms from DSI host before it even started probing (because boot-loader left DSI IRQs on). Signed-off-by: Vladimir Lypak Signed-off-by: Luca Weiss --- arch/arm64/boot/dts/qcom/msm8953.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi b/arch/arm64/boot/dts/qcom/msm8953.dtsi index ad2f8cf9c966..dcb5c98b793c 100644 --- a/arch/arm64/boot/dts/qcom/msm8953.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi @@ -859,6 +859,8 @@ mdss: display-subsystem@1a0 { "vsync", "core"; + resets = <&gcc GCC_MDSS_BCR>; + #address-cells = <1>; #size-cells = <1>; ranges; -- 2.43.0
[PATCH 2/3] clk: qcom: gcc-msm8953: add MDSS_BCR reset
From: Vladimir Lypak Add an entry in the gcc driver for the MDSS_BCR reset found on MSM8953. Signed-off-by: Vladimir Lypak [luca: expand commit message, move entry] Signed-off-by: Luca Weiss --- drivers/clk/qcom/gcc-msm8953.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/gcc-msm8953.c b/drivers/clk/qcom/gcc-msm8953.c index 3e5a8cb14d4d..5725857faae6 100644 --- a/drivers/clk/qcom/gcc-msm8953.c +++ b/drivers/clk/qcom/gcc-msm8953.c @@ -4171,6 +4171,7 @@ static const struct qcom_reset_map gcc_msm8953_resets[] = { [GCC_USB3PHY_PHY_BCR] = { 0x3f03c }, [GCC_USB3_PHY_BCR] = { 0x3f034 }, [GCC_USB_30_BCR]= { 0x3f070 }, + [GCC_MDSS_BCR] = { 0x4d074 }, }; static const struct regmap_config gcc_msm8953_regmap_config = { -- 2.43.0
[PATCH 1/3] dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem
From: Vladimir Lypak Add a new define for the GCC_MDSS_BCR found on MSM8953. Signed-off-by: Vladimir Lypak [luca: expand commit message] Signed-off-by: Luca Weiss --- include/dt-bindings/clock/qcom,gcc-msm8953.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/dt-bindings/clock/qcom,gcc-msm8953.h b/include/dt-bindings/clock/qcom,gcc-msm8953.h index 783162da6148..28090f9d7f24 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8953.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8953.h @@ -218,6 +218,7 @@ #define GCC_USB3PHY_PHY_BCR3 #define GCC_USB3_PHY_BCR 4 #define GCC_USB_30_BCR 5 +#define GCC_MDSS_BCR 6 /* GDSCs */ #define CPP_GDSC 0 -- 2.43.0
[PATCH 0/3] Add MDSS_BCR reset for MSM8953
Add the MDSS_BCR reset that is found in the GCC of MSM8953 so we can make sure the MDSS gets properly reset before Linux starts using it. Signed-off-by: Luca Weiss --- Vladimir Lypak (3): dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem clk: qcom: gcc-msm8953: add MDSS_BCR reset arm64: dts: qcom: msm8953: add reset for display subsystem arch/arm64/boot/dts/qcom/msm8953.dtsi| 2 ++ drivers/clk/qcom/gcc-msm8953.c | 1 + include/dt-bindings/clock/qcom,gcc-msm8953.h | 1 + 3 files changed, 4 insertions(+) --- base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d change-id: 20240123-msm8953-mdss-reset-68308a03fff5 Best regards, -- Luca Weiss
[PATCH 0/4] tracing/user_events: Introduce multi-format events
Currently user_events supports 1 event with the same name and must have the exact same format when referenced by multiple programs. This opens an opportunity for malicous or poorly thought through programs to create events that others use with different formats. Another scenario is user programs wishing to use the same event name but add more fields later when the software updates. Various versions of a program may be running side-by-side, which is prevented by the current single format requirement. Add a new register flag (USER_EVENT_REG_MULTI_FORMAT) which indicates the user program wishes to use the same user_event name, but may have several different formats of the event in the future. When this flag is used, create the underlying tracepoint backing the user_event with a unique name per-version of the format. It's important that existing ABI users do not get this logic automatically, even if one of the multi format events matches the format. This ensures existing programs that create events and assume the tracepoint name will match exactly continue to work as expected. Add logic to only check multi-format events with other multi-format events and single-format events to only check single-format events during find. Add a register_name (reg_name) to the user_event struct which allows for split naming of events. We now have the name that was used to register within user_events as well as the unique name for the tracepoint. Upon registering events ensure matches based on first the reg_name, followed by the fields and format of the event. This allows for multiple events with the same registered name to have different formats. The underlying tracepoint will have a unique name in the format of {reg_name}:[unique_id]. The unique_id is the time, in nanoseconds, of the event creation converted to hex. Since this is done under the register mutex, it is extremely unlikely for these IDs to ever match. It's also very unlikely a malicious program could consistently guess what the name would be and attempt to squat on it via the single format ABI. For example, if both "test u32 value" and "test u64 value" are used with the USER_EVENT_REG_MULTI_FORMAT the system would have 2 unique tracepoints. The dynamic_events file would then show the following: u:test u64 count u:test u32 count The actual tracepoint names look like this: test:[d5874fdac44] test:[d5914662cd4] Deleting events via "!u:test u64 count" would only delete the first tracepoint that matched that format. When the delete ABI is used all events with the same name will be attempted to be deleted. If per-version deletion is required, user programs should either not use persistent events or delete them via dynamic_events. Beau Belgrave (4): tracing/user_events: Prepare find/delete for same name events tracing/user_events: Introduce multi-format events selftests/user_events: Test multi-format events tracing/user_events: Document multi-format flag Documentation/trace/user_events.rst | 23 +- include/uapi/linux/user_events.h | 6 +- kernel/trace/trace_events_user.c | 224 +- .../testing/selftests/user_events/abi_test.c | 134 +++ 4 files changed, 325 insertions(+), 62 deletions(-) base-commit: 610a9b8f49fbcf1100716370d3b5f6f884a2835a -- 2.34.1
[PATCH 1/4] tracing/user_events: Prepare find/delete for same name events
The current code for finding and deleting events assumes that there will never be cases when user_events are registered with the same name, but different formats. In the future this scenario will exist to ensure user programs can be updated or modify their events and run different versions of their programs side-by-side without being blocked. This change does not yet allow for multi-format events. If user_events are registered with the same name but different arguments the programs see the same return values as before. This change simply makes it possible to easily accomodate for this in future changes. Update find_user_event() to take in argument parameters and register flags to accomodate future multi-format event scenarios. Have find validate argument matching and return error pointers to cover address in use cases, or allocation errors. Update callers to handle error pointer logic. Move delete_user_event() to use hash walking directly now that find has changed. Delete all events found that match the register name, stop if an error occurs and report back to the user. Update user_fields_match() to cover list_empty() scenarios instead of each callsite doing it now that find_user_event() uses it directly. Signed-off-by: Beau Belgrave --- kernel/trace/trace_events_user.c | 106 +-- 1 file changed, 58 insertions(+), 48 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 9365ce407426..0480579ba563 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -202,6 +202,8 @@ static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); static struct user_event_mm *user_event_mm_get_all(struct user_event *user); static void user_event_mm_put(struct user_event_mm *mm); static int destroy_user_event(struct user_event *user); +static bool user_fields_match(struct user_event *user, int argc, + const char **argv); static u32 user_event_key(char *name) { @@ -1493,17 +1495,24 @@ static int destroy_user_event(struct user_event *user) } static struct user_event *find_user_event(struct user_event_group *group, - char *name, u32 *outkey) + char *name, int argc, const char **argv, + u32 flags, u32 *outkey) { struct user_event *user; u32 key = user_event_key(name); *outkey = key; - hash_for_each_possible(group->register_table, user, node, key) - if (!strcmp(EVENT_NAME(user), name)) + hash_for_each_possible(group->register_table, user, node, key) { + if (strcmp(EVENT_NAME(user), name)) + continue; + + if (user_fields_match(user, argc, argv)) return user_event_get(user); + return ERR_PTR(-EADDRINUSE); + } + return NULL; } @@ -1860,6 +1869,9 @@ static bool user_fields_match(struct user_event *user, int argc, struct list_head *head = &user->fields; int i = 0; + if (argc == 0) + return list_empty(head); + list_for_each_entry_reverse(field, head, link) { if (!user_field_match(field, argc, argv, &i)) return false; @@ -1880,10 +1892,8 @@ static bool user_event_match(const char *system, const char *event, match = strcmp(EVENT_NAME(user), event) == 0 && (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0); - if (match && argc > 0) + if (match) match = user_fields_match(user, argc, argv); - else if (match && argc == 0) - match = list_empty(&user->fields); return match; } @@ -1922,11 +1932,11 @@ static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, struct user_event **newuser, int reg_flags) { - int ret; - u32 key; struct user_event *user; + char **argv = NULL; int argc = 0; - char **argv; + int ret; + u32 key; /* Currently don't support any text based flags */ if (flags != NULL) @@ -1935,41 +1945,34 @@ static int user_event_parse(struct user_event_group *group, char *name, if (!user_event_capable(reg_flags)) return -EPERM; + if (args) { + argv = argv_split(GFP_KERNEL, args, &argc); + + if (!argv) + return -ENOMEM; + } + /* Prevent dyn_event from racing */ mutex_lock(&event_mutex); - user = find_user_event(group, name, &key); + user = find_user_event(group, name, argc, (const char **)argv, + reg_flags, &key); mutex_unlock(&event_mutex); - if (user) { - if (args) { -
[PATCH 4/4] tracing/user_events: Document multi-format flag
User programs can now ask user_events to handle the synchronization of multiple different formats for an event with the same name via the new USER_EVENT_REG_MULTI_FORMAT flag. Add a section for USER_EVENT_REG_MULTI_FORMAT that explains the intended purpose and caveats of using it. Explain how deletion works in these cases and how to use /sys/kernel/tracing/dynamic_events for per-version deletion. Signed-off-by: Beau Belgrave --- Documentation/trace/user_events.rst | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst index d8f12442aaa6..35a2524bc73c 100644 --- a/Documentation/trace/user_events.rst +++ b/Documentation/trace/user_events.rst @@ -92,6 +92,20 @@ The following flags are currently supported. process closes or unregisters the event. Requires CAP_PERFMON otherwise -EPERM is returned. ++ USER_EVENT_REG_MULTI_FORMAT: The event can contain multiple formats. This + allows programs to prevent themselves from being blocked when their event + format changes and they wish to use the same name. When this flag is used the + tracepoint name will be in the new format of "name:[unique_id]" vs the older + format of "name". A tracepoint will be created for each unique pair of name + and format. This means if several processes use the same name and format, + they will use the same tracepoint. If yet another process uses the same name, + but a different format than the other processes, it will use a different + tracepoint with a new unique id. Recording programs need to scan tracefs for + the various different formats of the event name they are interested in + recording. The system name of the tracepoint will also use "user_events_multi" + instead of "user_events". This prevents single-format event names conflicting + with any multi-format event names within tracefs. + Upon successful registration the following is set. + write_index: The index to use for this file descriptor that represents this @@ -106,6 +120,9 @@ or perf record -e user_events:[name] when attaching/recording. **NOTE:** The event subsystem name by default is "user_events". Callers should not assume it will always be "user_events". Operators reserve the right in the future to change the subsystem name per-process to accommodate event isolation. +In addition if the USER_EVENT_REG_MULTI_FORMAT flag is used the tracepoint name +will have a unique id appended to it and the system name will be +"user_events_multi" as described above. Command Format ^^ @@ -156,7 +173,11 @@ to request deletes than the one used for registration due to this. to the event. If programs do not want auto-delete, they must use the USER_EVENT_REG_PERSIST flag when registering the event. Once that flag is used the event exists until DIAG_IOCSDEL is invoked. Both register and delete of an -event that persists requires CAP_PERFMON, otherwise -EPERM is returned. +event that persists requires CAP_PERFMON, otherwise -EPERM is returned. When +there are multiple formats of the same event name, all events with the same +name will be attempted to be deleted. If only a specific version is wanted to +be deleted then the /sys/kernel/tracing/dynamic_events file should be used for +that specific format of the event. Unregistering - -- 2.34.1
[PATCH 3/4] selftests/user_events: Test multi-format events
User_events now has multi-format events which allow for the same register name, but with different formats. When this occurs, different tracepoints are created with unique names. Add a new test that ensures the same name can be used for two different formats. Ensure they are isolated from each other and that name and arg matching still works if yet another register comes in with the same format as one of the two. Signed-off-by: Beau Belgrave --- .../testing/selftests/user_events/abi_test.c | 134 ++ 1 file changed, 134 insertions(+) diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index cef1ff1af223..b3480fad65a5 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "../kselftest_harness.h" @@ -23,6 +25,62 @@ const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable"; +const char *multi_dir_glob = "/sys/kernel/tracing/events/user_events_multi/__abi_event:*"; + +static int wait_for_delete(char *dir) +{ + struct stat buf; + int i; + + for (i = 0; i < 1; ++i) { + if (stat(dir, &buf) == -1 && errno == ENOENT) + return 0; + + usleep(1000); + } + + return -1; +} + +static int find_multi_event_dir(char *unique_field, char *out_dir, int dir_len) +{ + char path[256]; + glob_t buf; + int i, ret; + + ret = glob(multi_dir_glob, GLOB_ONLYDIR, NULL, &buf); + + if (ret) + return -1; + + ret = -1; + + for (i = 0; i < buf.gl_pathc; ++i) { + FILE *fp; + + snprintf(path, sizeof(path), "%s/format", buf.gl_pathv[i]); + fp = fopen(path, "r"); + + if (!fp) + continue; + + while (fgets(path, sizeof(path), fp) != NULL) { + if (strstr(path, unique_field)) { + fclose(fp); + /* strscpy is not available, use snprintf */ + snprintf(out_dir, dir_len, "%s", buf.gl_pathv[i]); + ret = 0; + goto out; + } + } + + fclose(fp); + } +out: + globfree(&buf); + + return ret; +} static bool event_exists(void) { @@ -74,6 +132,39 @@ static int event_delete(void) return ret; } +static int reg_enable_multi(void *enable, int size, int bit, int flags, + char *args) +{ + struct user_reg reg = {0}; + char full_args[512] = {0}; + int fd = open(data_file, O_RDWR); + int len; + int ret; + + if (fd < 0) + return -1; + + len = snprintf(full_args, sizeof(full_args), "__abi_event %s", args); + + if (len > sizeof(full_args)) { + ret = -E2BIG; + goto out; + } + + reg.size = sizeof(reg); + reg.name_args = (__u64)full_args; + reg.flags = USER_EVENT_REG_MULTI_FORMAT | flags; + reg.enable_bit = bit; + reg.enable_addr = (__u64)enable; + reg.enable_size = size; + + ret = ioctl(fd, DIAG_IOCSREG, ®); +out: + close(fd); + + return ret; +} + static int reg_enable_flags(void *enable, int size, int bit, int flags) { struct user_reg reg = {0}; @@ -207,6 +298,49 @@ TEST_F(user, bit_sizes) { ASSERT_NE(0, reg_enable(&self->check, 128, 0)); } +TEST_F(user, multi_format) { + char first_dir[256]; + char second_dir[256]; + struct stat buf; + + /* Multiple formats for the same name should work */ + ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 0, + 0, "u32 multi_first")); + + ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 1, + 0, "u64 multi_second")); + + /* Same name with same format should also work */ + ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 2, + 0, "u64 multi_second")); + + ASSERT_EQ(0, find_multi_event_dir("multi_first", + first_dir, sizeof(first_dir))); + + ASSERT_EQ(0, find_multi_event_dir("multi_second", + second_dir, sizeof(second_dir))); + + /* Should not be found in the same dir */ + ASSERT_NE(0, strcmp(first_dir, second_dir)); + + /* First dir should still exist */ + ASSERT_EQ(0, stat(first_dir, &buf)); + + /* Disabling first register should remove first dir */ + ASSERT_EQ(0, reg_disable(&self->check, 0)); + ASSERT_EQ(0, wait_for_delete(first_dir)); +
[PATCH 2/4] tracing/user_events: Introduce multi-format events
Currently user_events supports 1 event with the same name and must have the exact same format when referenced by multiple programs. This opens an opportunity for malicous or poorly thought through programs to create events that others use with different formats. Another scenario is user programs wishing to use the same event name but add more fields later when the software updates. Various versions of a program may be running side-by-side, which is prevented by the current single format requirement. Add a new register flag (USER_EVENT_REG_MULTI_FORMAT) which indicates the user program wishes to use the same user_event name, but may have several different formats of the event in the future. When this flag is used, create the underlying tracepoint backing the user_event with a unique name per-version of the format. It's important that existing ABI users do not get this logic automatically, even if one of the multi format events matches the format. This ensures existing programs that create events and assume the tracepoint name will match exactly continue to work as expected. Add logic to only check multi-format events with other multi-format events and single-format events to only check single-format events during find. Change system name of the multi-format event tracepoint to ensure that multi-format events are isolated completely from single-format events. Add a register_name (reg_name) to the user_event struct which allows for split naming of events. We now have the name that was used to register within user_events as well as the unique name for the tracepoint. Upon registering events ensure matches based on first the reg_name, followed by the fields and format of the event. This allows for multiple events with the same registered name to have different formats. The underlying tracepoint will have a unique name in the format of {reg_name}:[unique_id]. For example, if both "test u32 value" and "test u64 value" are used with the USER_EVENT_REG_MULTI_FORMAT the system would have 2 unique tracepoints. The dynamic_events file would then show the following: u:test u64 count u:test u32 count The actual tracepoint names look like this: test:[d5874fdac44] test:[d5914662cd4] Both would be under the new user_events_multi system name to prevent the older ABI from being used to squat on multi-formatted events and block their use. Deleting events via "!u:test u64 count" would only delete the first tracepoint that matched that format. When the delete ABI is used all events with the same name will be attempted to be deleted. If per-version deletion is required, user programs should either not use persistent events or delete them via dynamic_events. Signed-off-by: Beau Belgrave --- include/uapi/linux/user_events.h | 6 +- kernel/trace/trace_events_user.c | 118 +++ 2 files changed, 111 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h index f74f3aedd49c..a03de03dccbc 100644 --- a/include/uapi/linux/user_events.h +++ b/include/uapi/linux/user_events.h @@ -12,6 +12,7 @@ #include #define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_MULTI_SYSTEM "user_events_multi" #define USER_EVENTS_PREFIX "u:" /* Create dynamic location entry within a 32-bit value */ @@ -22,8 +23,11 @@ enum user_reg_flag { /* Event will not delete upon last reference closing */ USER_EVENT_REG_PERSIST = 1U << 0, + /* Event will be allowed to have multiple formats */ + USER_EVENT_REG_MULTI_FORMAT = 1U << 1, + /* This value or above is currently non-ABI */ - USER_EVENT_REG_MAX = 1U << 1, + USER_EVENT_REG_MAX = 1U << 2, }; /* diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 0480579ba563..f9c0781285b6 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -34,7 +34,8 @@ /* Limit how long of an event name plus args within the subsystem. */ #define MAX_EVENT_DESC 512 -#define EVENT_NAME(user_event) ((user_event)->tracepoint.name) +#define EVENT_NAME(user_event) ((user_event)->reg_name) +#define EVENT_TP_NAME(user_event) ((user_event)->tracepoint.name) #define MAX_FIELD_ARRAY_SIZE 1024 /* @@ -54,10 +55,13 @@ * allows isolation for events by various means. */ struct user_event_group { - char*system_name; - struct hlist_node node; - struct mutex reg_mutex; + char*system_name; + char*system_multi_name; + struct hlist_node node; + struct mutexreg_mutex; DECLARE_HASHTABLE(register_table, 8); + /* ID that moves forward within the group for multi-event names */ + u64 multi_id; }; /* Group for init_user_ns mapping, top-most group */ @@ -78,6 +82,7 @@ static unsigned int current_user_events; */ str
[PATCH] tracing: Include PPIN in mce_record tracepoint
Machine Check Error information from struct mce is exported to userspace through the mce_record tracepoint. Currently, however, the PPIN (Protected Processor Inventory Number) field of struct mce is not exported through the tracepoint. Export PPIN through the tracepoint as it may provide useful information for debug and analysis. Signed-off-by: Avadhut Naik --- include/trace/events/mce.h | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 1391ada0da3b..657b93ec8176 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -25,6 +25,7 @@ TRACE_EVENT(mce_record, __field(u64,ipid) __field(u64,ip ) __field(u64,tsc ) + __field(u64,ppin) __field(u64,walltime) __field(u32,cpu ) __field(u32,cpuid ) @@ -45,6 +46,7 @@ TRACE_EVENT(mce_record, __entry->ipid = m->ipid; __entry->ip = m->ip; __entry->tsc= m->tsc; + __entry->ppin = m->ppin; __entry->walltime = m->time; __entry->cpu= m->extcpu; __entry->cpuid = m->cpuid; @@ -55,7 +57,7 @@ TRACE_EVENT(mce_record, __entry->cpuvendor = m->cpuvendor; ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, @@ -63,6 +65,7 @@ TRACE_EVENT(mce_record, __entry->addr, __entry->misc, __entry->synd, __entry->cs, __entry->ip, __entry->tsc, + __entry->ppin, __entry->cpuvendor, __entry->cpuid, __entry->walltime, __entry->socketid, base-commit: 451b2bc29430fa147e36a48348f8b6b615fd6820 -- 2.34.1
Re: [PATCH] tracing: Include PPIN in mce_record tracepoint
On Tue, Jan 23, 2024 at 05:51:50PM -0600, Avadhut Naik wrote: > Machine Check Error information from struct mce is exported to userspace > through the mce_record tracepoint. > > Currently, however, the PPIN (Protected Processor Inventory Number) field > of struct mce is not exported through the tracepoint. > > Export PPIN through the tracepoint as it may provide useful information > for debug and analysis. Awesome. I've been meaning to update the tracepoint for ages, but it never gets to the top of the queue. But some questions: 1) Are tracepoints a user visible ABI? Adding a new field in the middle feels like it might be problematic. I asked this question many years ago and Steven Rostedt said there was some tracing library in the works that would make this OK for appplications using that library. 2) While you are adding to the tracepoint, should we batch up all the useful changes that have been made to "struct mce". I think the new fields that might be of use are: __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ __u32 microcode;/* Microcode revision */ > > Signed-off-by: Avadhut Naik > --- > include/trace/events/mce.h | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h > index 1391ada0da3b..657b93ec8176 100644 > --- a/include/trace/events/mce.h > +++ b/include/trace/events/mce.h > @@ -25,6 +25,7 @@ TRACE_EVENT(mce_record, > __field(u64,ipid) > __field(u64,ip ) > __field(u64,tsc ) > + __field(u64,ppin) > __field(u64,walltime) > __field(u32,cpu ) > __field(u32,cpuid ) > @@ -45,6 +46,7 @@ TRACE_EVENT(mce_record, > __entry->ipid = m->ipid; > __entry->ip = m->ip; > __entry->tsc= m->tsc; > + __entry->ppin = m->ppin; > __entry->walltime = m->time; > __entry->cpu= m->extcpu; > __entry->cpuid = m->cpuid; > @@ -55,7 +57,7 @@ TRACE_EVENT(mce_record, > __entry->cpuvendor = m->cpuvendor; > ), ... rest of patch trimmed. -Tony
[PATCH] tracing: Include PPIN in mce_record tracepoint
Hi, On 1/23/2024 6:12 PM, Tony Luck wrote: > On Tue, Jan 23, 2024 at 05:51:50PM -0600, Avadhut Naik wrote: >> Machine Check Error information from struct mce is exported to userspace >> through the mce_record tracepoint. >> >> Currently, however, the PPIN (Protected Processor Inventory Number) field >> of struct mce is not exported through the tracepoint. >> >> Export PPIN through the tracepoint as it may provide useful information >> for debug and analysis. > > Awesome. I've been meaning to update the tracepoint for ages, but > it never gets to the top of the queue. > > But some questions: > > 1) Are tracepoints a user visible ABI? Adding a new field in the middle > feels like it might be problematic. I asked this question many years > ago and Steven Rostedt said there was some tracing library in the works > that would make this OK for appplications using that library. > I think they can be user visible through the "trace" and "trace_pipe" in /sys/kernel/debug/tracing. But you will have to enable the events you want to trace through /sys/kernel/debug/tracing/events//enable. AFAIK, this (adding field in the middle) shouldn't be problematic as we have the tracepoint format available in debugfs. For e.g. with this patch, the format is as follows: [root avadnaik]# cat /sys/kernel/debug/tracing/events/mce/mce_record/format name: mce_record ID: 113 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:u64 mcgcap; offset:8; size:8; signed:0; field:u64 mcgstatus;offset:16; size:8; signed:0; field:u64 status; offset:24; size:8; signed:0; field:u64 addr; offset:32; size:8; signed:0; field:u64 misc; offset:40; size:8; signed:0; field:u64 synd; offset:48; size:8; signed:0; field:u64 ipid; offset:56; size:8; signed:0; field:u64 ip; offset:64; size:8; signed:0; field:u64 tsc; offset:72; size:8; signed:0; field:u64 ppin; offset:80; size:8; signed:0; field:u64 walltime; offset:88; size:8; signed:0; field:u32 cpu; offset:96; size:4; signed:0; field:u32 cpuid;offset:100; size:4; signed:0; field:u32 apicid; offset:104; size:4; signed:0; field:u32 socketid; offset:108; size:4; signed:0; field:u8 cs;offset:112; size:1; signed:0; field:u8 bank; offset:113; size:1; signed:0; field:u8 cpuvendor; offset:114; size:1; signed:0; print fmt: "CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", REC->cpu, REC->mcgcap, REC->mcgstatus, REC->bank, REC->status, REC->ipid, REC->addr, REC->misc, REC->synd, REC->cs, REC->ip, REC->tsc, REC->ppin, REC->cpuvendor, REC->cpuid, REC->walltime, REC->socketid, REC->apicid Just quickly tried with rasdaemon and things seem to be okay. Also, not a cent percent sure, but the library you are mentioning of, I think its the libtraceevent library and IIUC, it utilizes the above tracepoint format. > 2) While you are adding to the tracepoint, should we batch up all > the useful changes that have been made to "struct mce". I think the > new fields that might be of use are: > > __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ > __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ > __u64 ppin; /* Protected Processor Inventory Number */ > __u32 microcode;/* Microcode revision */ > synd and ipid are already a part of mce_record tracepoint. (They too have been added in the middle). Will add the microcode field in the next version. >> >> Signed-off-by: Avadhut Naik >> --- >> include/trace/events/mce.h | 5 - >> 1 file changed, 4 insertions(+), 1 deletion(-) >> >> diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h >> index 1391ada0da3b..657b93ec8176 100644 >> --- a/include/trace/events/mce.h >> +++ b/include/trace/events/mce.h >> @@ -25,6 +25,7 @@ TRACE_EVENT(mce_record, >> __field(u64,ipid) >> __field(u64,ip ) >> __field(u64,tsc ) >> +__field(u64,ppin) >> __field(u64,walltime) >> __field(u32,cpu ) >> __field(u32,cpuid ) >> @@ -45,6 +46,7 @@ TRACE_EVENT(mce_record, >> __entry->ipid
Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"
On Tue, 23 Jan 2024 17:21:39 +0800 Ye Bin wrote: > This patch adds test cases for new print format type "%pd/%pD".The test cases > test the following items: > 1. Test README if add "%pd/%pD" type; > 2. Test "%pd" type for dput(); > 3. Test "%pD" type for vfs_read(); > > Signed-off-by: Ye Bin > --- > .../ftrace/test.d/kprobe/kprobe_args_vfs.tc | 79 +++ > 1 file changed, 79 insertions(+) > create mode 100644 > tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc > > diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc > b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc > new file mode 100644 > index ..1d8edd294dd6 > --- /dev/null > +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc > @@ -0,0 +1,79 @@ > +#!/bin/sh > +# SPDX-License-Identifier: GPL-2.0 > +# description: Kprobe event VFS type argument > +# requires: kprobe_events > + > +case `uname -m` in > +x86_64) > + ARG1=%di > +;; > +i[3456]86) > + ARG1=%ax > +;; > +aarch64) > + ARG1=%x0 > +;; > +arm*) > + ARG1=%r0 > +;; > +ppc64*) > + ARG1=%r3 > +;; > +ppc*) > + ARG1=%r3 You can merge this ppc* and ppc64* cases :) > +;; > +s390*) > + ARG1=%r2 > +;; > +mips*) > + ARG1=%r4 > +;; > +loongarch*) > + ARG1=%r4 > +;; > +riscv*) > + ARG1=%a0 Anyway, I wonder why don't you use '$arg1' instead of these registers. Is there any reason? Thank you, > +;; > +*) > + echo "Please implement other architecture here" > + exit_untested > +esac > + > +: "Test argument %pd/%pD in README" > +grep -q "%pd/%pD" README > + > +: "Test argument %pd with name" > +echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events > +echo 1 > events/kprobes/testprobe/enable > +grep -q "1" events/kprobes/testprobe/enable > +echo 0 > events/kprobes/testprobe/enable > +grep "dput" trace | grep -q "enable" > +echo "" > kprobe_events > +echo "" > trace > + > +: "Test argument %pd without name" > +echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events > +echo 1 > events/kprobes/testprobe/enable > +grep -q "1" events/kprobes/testprobe/enable > +echo 0 > events/kprobes/testprobe/enable > +grep "dput" trace | grep -q "enable" > +echo "" > kprobe_events > +echo "" > trace > + > +: "Test argument %pD with name" > +echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events > +echo 1 > events/kprobes/testprobe/enable > +grep -q "1" events/kprobes/testprobe/enable > +echo 0 > events/kprobes/testprobe/enable > +grep "vfs_read" trace | grep -q "enable" > +echo "" > kprobe_events > +echo "" > trace > + > +: "Test argument %pD without name" > +echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events > +echo 1 > events/kprobes/testprobe/enable > +grep -q "1" events/kprobes/testprobe/enable > +echo 0 > events/kprobes/testprobe/enable > +grep "vfs_read" trace | grep -q "enable" > +echo "" > kprobe_events > +echo "" > trace > -- > 2.31.1 > -- Masami Hiramatsu (Google)
Re: [PATCH] tracing: Include PPIN in mce_record tracepoint
On Tue, 23 Jan 2024 19:29:52 -0600 "Naik, Avadhut" wrote: > > But some questions: > > > > 1) Are tracepoints a user visible ABI? Adding a new field in the middle > > feels like it might be problematic. I asked this question many years > > ago and Steven Rostedt said there was some tracing library in the works > > that would make this OK for appplications using that library. > > > > I think they can be user visible through the "trace" and "trace_pipe" in > /sys/kernel/debug/tracing. But you will have to enable the events you want > to trace through /sys/kernel/debug/tracing/events//enable. > > AFAIK, this (adding field in the middle) shouldn't be problematic as we > have the tracepoint format available in debugfs. For e.g. with this patch, > the format is as follows: > > [root avadnaik]# cat /sys/kernel/debug/tracing/events/mce/mce_record/format > name: mce_record > ID: 113 > format: > field:unsigned short common_type; offset:0; size:2; > signed:0; > field:unsigned char common_flags; offset:2; size:1; > signed:0; > field:unsigned char common_preempt_count; offset:3; > size:1; signed:0; > field:int common_pid; offset:4; size:4; signed:1; > > field:u64 mcgcap; offset:8; size:8; signed:0; > field:u64 mcgstatus;offset:16; size:8; signed:0; > field:u64 status; offset:24; size:8; signed:0; > field:u64 addr; offset:32; size:8; signed:0; > field:u64 misc; offset:40; size:8; signed:0; > field:u64 synd; offset:48; size:8; signed:0; > field:u64 ipid; offset:56; size:8; signed:0; > field:u64 ip; offset:64; size:8; signed:0; > field:u64 tsc; offset:72; size:8; signed:0; > field:u64 ppin; offset:80; size:8; signed:0; > field:u64 walltime; offset:88; size:8; signed:0; > field:u32 cpu; offset:96; size:4; signed:0; > field:u32 cpuid;offset:100; size:4; signed:0; > field:u32 apicid; offset:104; size:4; signed:0; > field:u32 socketid; offset:108; size:4; signed:0; > field:u8 cs;offset:112; size:1; signed:0; > field:u8 bank; offset:113; size:1; signed:0; > field:u8 cpuvendor; offset:114; size:1; signed:0; > > print fmt: "CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, > ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PPIN: > %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", REC->cpu, > REC->mcgcap, REC->mcgstatus, REC->bank, REC->status, REC->ipid, REC->addr, > REC->misc, REC->synd, REC->cs, REC->ip, REC->tsc, REC->ppin, REC->cpuvendor, > REC->cpuid, REC->walltime, REC->socketid, REC->apicid > > > Just quickly tried with rasdaemon and things seem to be okay. > > Also, not a cent percent sure, but the library you are mentioning of, I think > its the libtraceevent library and IIUC, it utilizes the above tracepoint > format. > Yes, rasdaemon uses libtraceevent (or a copy of it internally) that reads the format file to find fields. You can safely add fields to the middle of the event structure and the parsing will be just fine. -- Steve
Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"
On 2024/1/24 9:32, Masami Hiramatsu (Google) wrote: On Tue, 23 Jan 2024 17:21:39 +0800 Ye Bin wrote: This patch adds test cases for new print format type "%pd/%pD".The test cases test the following items: 1. Test README if add "%pd/%pD" type; 2. Test "%pd" type for dput(); 3. Test "%pD" type for vfs_read(); Signed-off-by: Ye Bin --- .../ftrace/test.d/kprobe/kprobe_args_vfs.tc | 79 +++ 1 file changed, 79 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc new file mode 100644 index ..1d8edd294dd6 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc @@ -0,0 +1,79 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event VFS type argument +# requires: kprobe_events + +case `uname -m` in +x86_64) + ARG1=%di +;; +i[3456]86) + ARG1=%ax +;; +aarch64) + ARG1=%x0 +;; +arm*) + ARG1=%r0 +;; +ppc64*) + ARG1=%r3 +;; +ppc*) + ARG1=%r3 You can merge this ppc* and ppc64* cases :) +;; +s390*) + ARG1=%r2 +;; +mips*) + ARG1=%r4 +;; +loongarch*) + ARG1=%r4 +;; +riscv*) + ARG1=%a0 Anyway, I wonder why don't you use '$arg1' instead of these registers. Is there any reason? Thank you, Thank you for your advice. Actually, I wrote the test case by referring to " kprobe_args_string.tc". I'll modify it according to your suggestion. +;; +*) + echo "Please implement other architecture here" + exit_untested +esac + +: "Test argument %pd/%pD in README" +grep -q "%pd/%pD" README + +: "Test argument %pd with name" +echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pd without name" +echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD with name" +echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD without name" +echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace -- 2.31.1
Re: [PATCH] virtio_net: Support RX hash XDP hint
On Mon, Jan 22, 2024 at 7:10 PM Heng Qi wrote: > > Hi Liang Chen, > > 在 2024/1/22 下午6:22, Liang Chen 写道: > > The RSS hash report is a feature that's part of the virtio specification. > > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost > > (still a work in progress as per [1]) support this feature. While the > > capability to obtain the RSS hash has been enabled in the normal path, > > it's currently missing in the XDP path. Therefore, we are introducing XDP > > hints through kfuncs to allow XDP programs to access the RSS hash. > > > > Signed-off-by: Liang Chen > > --- > > drivers/net/virtio_net.c | 56 > > 1 file changed, 56 insertions(+) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index d7ce4a1011ea..1463a4709e3c 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct > > virtnet_info *vi, const int mtu) > > } > > } > > > > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, > > +enum xdp_rss_hash_type *rss_type) > > +{ > > + const struct xdp_buff *xdp = (void *)_ctx; > > + struct virtio_net_hdr_v1_hash *hdr_hash; > > + struct virtnet_info *vi; > > + > > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) > > I think 'vi->has_rss_hash_report' should be used here. > NETIF_F_RXHASH cannot guarantee that the hash report feature is negotiated, > and accessing hash_report and hash_value is unsafe at this time. > My understanding is that rxhash in dev->features is turned on only if the corresponding hw feature is set. We will double check on this. > > + return -ENODATA; > > + > > + vi = netdev_priv(xdp->rxq->dev); > > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); > > If the virtio-net-hdr is overrided by the XDP prog, how can this be done > correctly and as expected? > Yeah, thanks for bringing up this concern. We are actively working on a fix of this issue(possibly with a wrapper structure of xdp_buff). Thanks, Liang > Thanks, > Heng > > > + > > + switch (__le16_to_cpu(hdr_hash->hash_report)) { > > + case VIRTIO_NET_HASH_REPORT_TCPv4: > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv4: > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_TCPv6: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv6: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv4: > > + *rss_type = XDP_RSS_TYPE_L3_IPV4; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv6: > > + *rss_type = XDP_RSS_TYPE_L3_IPV6; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_NONE: > > + default: > > + *rss_type = XDP_RSS_TYPE_NONE; > > + } > > + > > + *hash = __le32_to_cpu(hdr_hash->hash_value); > > + return 0; > > +} > > + > > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { > > + .xmo_rx_hash= virtnet_xdp_rx_hash, > > +}; > > + > > static int virtnet_probe(struct virtio_device *vdev) > > { > > int i, err = -ENOMEM; > > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev) > > dev->ethtool_ops = &virtnet_ethtool_ops; > > SET_NETDEV_DEV(dev, &vdev->dev); > > > > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; > > + > > /* Do we support "hardware" checksums? */ > > if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { > > /* This opens up the world of extra features. */ >
Re: [PATCH] virtio_net: Support RX hash XDP hint
On Mon, Jan 22, 2024 at 6:23 PM Liang Chen wrote: > > The RSS hash report is a feature that's part of the virtio specification. > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost > (still a work in progress as per [1]) support this feature. While the > capability to obtain the RSS hash has been enabled in the normal path, > it's currently missing in the XDP path. Therefore, we are introducing XDP > hints through kfuncs to allow XDP programs to access the RSS hash. > > Signed-off-by: Liang Chen > --- > drivers/net/virtio_net.c | 56 > 1 file changed, 56 insertions(+) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index d7ce4a1011ea..1463a4709e3c 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct > virtnet_info *vi, const int mtu) > } > } > > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, > + enum xdp_rss_hash_type *rss_type) > +{ > + const struct xdp_buff *xdp = (void *)_ctx; > + struct virtio_net_hdr_v1_hash *hdr_hash; > + struct virtnet_info *vi; > + > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) > + return -ENODATA; > + > + vi = netdev_priv(xdp->rxq->dev); > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); Is there a guarantee that the hdr is not modified? Thanks
Re: [PATCH] virtio_net: Support RX hash XDP hint
On Tue, Jan 23, 2024 at 3:02 PM Michael S. Tsirkin wrote: > > On Mon, Jan 22, 2024 at 06:22:56PM +0800, Liang Chen wrote: > > The RSS hash report is a feature that's part of the virtio specification. > > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost > > (still a work in progress as per [1]) support this feature. While the > > capability to obtain the RSS hash has been enabled in the normal path, > > it's currently missing in the XDP path. Therefore, we are introducing XDP > > hints through kfuncs to allow XDP programs to access the RSS hash. > > > > Signed-off-by: Liang Chen > > --- > > drivers/net/virtio_net.c | 56 > > 1 file changed, 56 insertions(+) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index d7ce4a1011ea..1463a4709e3c 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct > > virtnet_info *vi, const int mtu) > > } > > } > > > > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, > > +enum xdp_rss_hash_type *rss_type) > > +{ > > + const struct xdp_buff *xdp = (void *)_ctx; > > + struct virtio_net_hdr_v1_hash *hdr_hash; > > + struct virtnet_info *vi; > > + > > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) > > + return -ENODATA; > > + > > + vi = netdev_priv(xdp->rxq->dev); > > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len); > > + > > + switch (__le16_to_cpu(hdr_hash->hash_report)) { > > + case VIRTIO_NET_HASH_REPORT_TCPv4: > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv4: > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_TCPv6: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv6: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP; > > + break; > > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv4: > > + *rss_type = XDP_RSS_TYPE_L3_IPV4; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv6: > > + *rss_type = XDP_RSS_TYPE_L3_IPV6; > > + break; > > + case VIRTIO_NET_HASH_REPORT_IPv6_EX: > > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX; > > + break; > > + case VIRTIO_NET_HASH_REPORT_NONE: > > + default: > > + *rss_type = XDP_RSS_TYPE_NONE; > > + } > > + > > + *hash = __le32_to_cpu(hdr_hash->hash_value); > > + return 0; > > +} > > + > > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { > > + .xmo_rx_hash= virtnet_xdp_rx_hash, > > +}; > > + > > static int virtnet_probe(struct virtio_device *vdev) > > { > > int i, err = -ENOMEM; > > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev) > > dev->ethtool_ops = &virtnet_ethtool_ops; > > SET_NETDEV_DEV(dev, &vdev->dev); > > > > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; > > + > > How about making this assignment depend on > > xdp->rxq->dev->features & NETIF_F_RXHASH > Thanks! How about dev->hw_features & NETIF_F_RXHASH? dev->features & NETIF_F_RXHASH has not been enabled at this point. > ? > > > /* Do we support "hardware" checksums? */ > > if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { > > /* This opens up the world of extra features. */ > > -- > > 2.40.1 >
Re: [PATCH -next v6 0/2] Make memory reclamation measurable
ping~ 在 2024/1/5 9:36, Bixuan Cui 写道: When the system memory is low, kswapd reclaims the memory. The key steps of memory reclamation include 1.shrink_lruvec * shrink_active_list, moves folios from the active LRU to the inactive LRU * shrink_inactive_list, shrink lru from inactive LRU list 2.shrink_slab * shrinker->count_objects(), calculates the freeable memory * shrinker->scan_objects(), reclaims the slab memory The existing tracers in the vmscan are as follows: --do_try_to_free_pages --shrink_zones --trace_mm_vmscan_node_reclaim_begin (tracer) --shrink_node --shrink_node_memcgs --trace_mm_vmscan_memcg_shrink_begin (tracer) --shrink_lruvec --shrink_list --shrink_active_list --trace_mm_vmscan_lru_shrink_active (tracer) --shrink_inactive_list --trace_mm_vmscan_lru_shrink_inactive (tracer) --shrink_active_list --shrink_slab --do_shrink_slab --shrinker->count_objects() --trace_mm_shrink_slab_start (tracer) --shrinker->scan_objects() --trace_mm_shrink_slab_end (tracer) --trace_mm_vmscan_memcg_shrink_end (tracer) --trace_mm_vmscan_node_reclaim_end (tracer) If we get the duration and quantity of shrink lru and slab, then we can measure the memory recycling, as follows Measuring memory reclamation with bpf: LRU FILE: CPU COMMShrinkActive(us) ShrinkInactive(us) Reclaim(page) 7 kswapd0 26 51 32 7 kswapd0 52 47 13 SLAB: CPU COMMOBJ_NAMECount_Dur(us) Freeable(page) Scan_Dur(us) Reclaim(page) 1 kswapd0 super_cache_scan.cfi_jt 2 341 3225 128 7 kswapd0 super_cache_scan.cfi_jt 0 2247 8524 1024 7 kswapd0 super_cache_scan.cfi_jt 23670 00 For this, add the new tracer to shrink_active_list/shrink_inactive_list and shrinker->count_objects(). Changes: v6: * Add Reviewed-by from Steven Rostedt. v5: * Use 'DECLARE_EVENT_CLASS(mm_vmscan_lru_shrink_start_template' to replace 'RACE_EVENT(mm_vmscan_lru_shrink_inactive/active_start' * Add the explanation for adding new shrink lru events into 'mm: vmscan: add new event to trace shrink lru' v4: Add Reviewed-by and Changlog to every patch. v3: Swap the positions of 'nid' and 'freeable' to prevent the hole in the trace event. v2: Modify trace_mm_vmscan_lru_shrink_inactive() in evict_folios() at the same time to fix build error. cuibixuan (2): mm: shrinker: add new event to trace shrink count mm: vmscan: add new event to trace shrink lru include/trace/events/vmscan.h | 80 ++- mm/shrinker.c | 4 ++ mm/vmscan.c | 11 +++-- 3 files changed, 90 insertions(+), 5 deletions(-)
Re: [PATCH v4 3/7] tracing/probes: support '%pd' type for print struct dentry's name
On 2024/1/23 22:40, Masami Hiramatsu (Google) wrote: On Tue, 23 Jan 2024 17:21:35 +0800 Ye Bin wrote: Similar to '%pd' for printk, use '%pd' for print struct dentry's name. Signed-off-by: Ye Bin --- kernel/trace/trace_kprobe.c | 6 ++ kernel/trace/trace_probe.h | 1 + 2 files changed, 7 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c4c6e0e0068b..00b74530fbad 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -779,6 +779,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) char buf[MAX_EVENT_NAME_LEN]; char gbuf[MAX_EVENT_NAME_LEN]; char abuf[MAX_BTF_ARGS_LEN]; + char dbuf[MAX_DENTRY_ARGS_LEN]; Hmm, no, I don't like to expand stack anymore. Please allocate it from heap. Do I need to change the other buffers on the stacks to allocate memory from heap? struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; switch (argv[0][0]) { @@ -930,6 +931,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) argv = new_argv; } + ret = traceprobe_expand_dentry_args(argc, argv, dbuf, + MAX_DENTRY_ARGS_LEN); + if (ret) + goto out; And calling this here will not cover the trace_fprobe. Could you call this from traceprobe_expand_meta_args() instead of calling it directly from trace_kprobe? Then it can be used from fprobe_event too. Thank you, At first I wanted to implement the extension logic in traceprobe_expand_meta_args(), but I found that the code was difficult to understand when I started writing. If fprobe_event wants to support this function, is traceprobe_expand_dentry_args() also called? Or re-encapsulate an interface to include the logic of different extensions. In this way, the same buffer is used for the entire extension process, and the extension function needs to return the information about the length of the buffer. + /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, argc, is_return); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 553371a4e0b1..d9c053824975 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -34,6 +34,7 @@ #define MAX_ARRAY_LEN 64 #define MAX_ARG_NAME_LEN 32 #define MAX_BTF_ARGS_LEN 128 +#define MAX_DENTRY_ARGS_LEN256 Why do you think I determined this value according to the extreme case that a parameter is expanded to occupy 64 bytes, and a maximum of four such large parameters are supported. #define MAX_STRING_SIZE PATH_MAX #define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN) -- 2.31.1
Re: [PATCH v4 7/7] selftests/ftrace: add test cases for VFS type "%pd" and "%pD"
On 2024/1/24 9:32, Masami Hiramatsu (Google) wrote: On Tue, 23 Jan 2024 17:21:39 +0800 Ye Bin wrote: This patch adds test cases for new print format type "%pd/%pD".The test cases test the following items: 1. Test README if add "%pd/%pD" type; 2. Test "%pd" type for dput(); 3. Test "%pD" type for vfs_read(); Signed-off-by: Ye Bin --- .../ftrace/test.d/kprobe/kprobe_args_vfs.tc | 79 +++ 1 file changed, 79 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc new file mode 100644 index ..1d8edd294dd6 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc @@ -0,0 +1,79 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event VFS type argument +# requires: kprobe_events + +case `uname -m` in +x86_64) + ARG1=%di +;; +i[3456]86) + ARG1=%ax +;; +aarch64) + ARG1=%x0 +;; +arm*) + ARG1=%r0 +;; +ppc64*) + ARG1=%r3 +;; +ppc*) + ARG1=%r3 You can merge this ppc* and ppc64* cases :) +;; +s390*) + ARG1=%r2 +;; +mips*) + ARG1=%r4 +;; +loongarch*) + ARG1=%r4 +;; +riscv*) + ARG1=%a0 Anyway, I wonder why don't you use '$arg1' instead of these registers. Is there any reason? Thank you, I looked at the parameter parsing code again, and using "$arg1" requires the kernel to enable the CONFIG_HAVE_FUNCTION_ARG_ACCESS_API configuration. +;; +*) + echo "Please implement other architecture here" + exit_untested +esac + +: "Test argument %pd/%pD in README" +grep -q "%pd/%pD" README + +: "Test argument %pd with name" +echo "p:testprobe dput name=${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pd without name" +echo "p:testprobe dput ${ARG1}:%pd" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD with name" +echo "p:testprobe vfs_read name=${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD without name" +echo "p:testprobe vfs_read ${ARG1}:%pD" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace -- 2.31.1
Re: [PATCH v7 04/15] x86/sgx: Implement basic EPC misc cgroup functionality
On Mon, 22 Jan 2024 14:25:53 -0600, Jarkko Sakkinen wrote: On Mon Jan 22, 2024 at 7:20 PM EET, Haitao Huang wrote: From: Kristen Carlson Accardi SGX Enclave Page Cache (EPC) memory allocations are separate from normal RAM allocations, and are managed solely by the SGX subsystem. The existing cgroup memory controller cannot be used to limit or account for SGX EPC memory, which is a desirable feature in some environments. For example, in a Kubernates environment, a user can request certain EPC quota for a pod but the orchestrator can not enforce the quota to limit runtime EPC usage of the pod without an EPC cgroup controller. Utilize the misc controller [admin-guide/cgroup-v2.rst, 5-9. Misc] to limit and track EPC allocations per cgroup. Earlier patches have added the "sgx_epc" resource type in the misc cgroup subsystem. Add basic support in SGX driver as the "sgx_epc" resource provider: - Set "capacity" of EPC by calling misc_cg_set_capacity() - Update EPC usage counter, "current", by calling charge and uncharge APIs for EPC allocation and deallocation, respectively. - Setup sgx_epc resource type specific callbacks, which perform initialization and cleanup during cgroup allocation and deallocation, respectively. With these changes, the misc cgroup controller enables user to set a hard limit for EPC usage in the "misc.max" interface file. It reports current usage in "misc.current", the total EPC memory available in "misc.capacity", and the number of times EPC usage reached the max limit in "misc.events". For now, the EPC cgroup simply blocks additional EPC allocation in sgx_alloc_epc_page() when the limit is reached. Reclaimable pages are still tracked in the global active list, only reclaimed by the global reclaimer when the total free page count is lower than a threshold. Later patches will reorganize the tracking and reclamation code in the global reclaimer and implement per-cgroup tracking and reclaiming. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Kristen Carlson Accardi Co-developed-by: Haitao Huang Signed-off-by: Haitao Huang For consistency sake I'd also add co-developed-by for Kristen. This is at least the format suggested by kernel documentation. She is the "From Author", so only Signed-off-by is needed for her according to the second example in the doc[1]? Thanks Haitao [1]https://docs.kernel.org/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by
Re: [PATCH] virtio_net: Support RX hash XDP hint
On Wed, 24 Jan 2024 10:04:51 +0800, Liang Chen wrote: > On Mon, Jan 22, 2024 at 7:10 PM Heng Qi wrote: > > > > Hi Liang Chen, > > > > 在 2024/1/22 下午6:22, Liang Chen 写道: > > > The RSS hash report is a feature that's part of the virtio specification. > > > Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost > > > (still a work in progress as per [1]) support this feature. While the > > > capability to obtain the RSS hash has been enabled in the normal path, > > > it's currently missing in the XDP path. Therefore, we are introducing XDP > > > hints through kfuncs to allow XDP programs to access the RSS hash. > > > > > > Signed-off-by: Liang Chen > > > --- > > > drivers/net/virtio_net.c | 56 > > > 1 file changed, 56 insertions(+) > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > > index d7ce4a1011ea..1463a4709e3c 100644 > > > --- a/drivers/net/virtio_net.c > > > +++ b/drivers/net/virtio_net.c > > > @@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct > > > virtnet_info *vi, const int mtu) > > > } > > > } > > > > > > +static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, > > > +enum xdp_rss_hash_type *rss_type) > > > +{ > > > + const struct xdp_buff *xdp = (void *)_ctx; > > > + struct virtio_net_hdr_v1_hash *hdr_hash; > > > + struct virtnet_info *vi; > > > + > > > + if (!(xdp->rxq->dev->features & NETIF_F_RXHASH)) > > > > I think 'vi->has_rss_hash_report' should be used here. > > NETIF_F_RXHASH cannot guarantee that the hash report feature is negotiated, > > and accessing hash_report and hash_value is unsafe at this time. > > > > My understanding is that rxhash in dev->features is turned on only if > the corresponding hw feature is set. We will double check on this. > > > > + return -ENODATA; > > > + > > > + vi = netdev_priv(xdp->rxq->dev); > > > + hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - > > > vi->hdr_len); > > > > If the virtio-net-hdr is overrided by the XDP prog, how can this be done > > correctly and as expected? > > > > Yeah, thanks for bringing up this concern. We are actively working on > a fix of this issue(possibly with a wrapper structure of xdp_buff). Are there some places to save the hash before run xdp? Thanks. > > Thanks, > Liang > > > Thanks, > > Heng > > > > > + > > > + switch (__le16_to_cpu(hdr_hash->hash_report)) { > > > + case VIRTIO_NET_HASH_REPORT_TCPv4: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_UDPv4: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_TCPv6: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_UDPv6: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_TCPv6_EX: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_UDPv6_EX: > > > + *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_IPv4: > > > + *rss_type = XDP_RSS_TYPE_L3_IPV4; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_IPv6: > > > + *rss_type = XDP_RSS_TYPE_L3_IPV6; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_IPv6_EX: > > > + *rss_type = XDP_RSS_TYPE_L3_IPV6_EX; > > > + break; > > > + case VIRTIO_NET_HASH_REPORT_NONE: > > > + default: > > > + *rss_type = XDP_RSS_TYPE_NONE; > > > + } > > > + > > > + *hash = __le32_to_cpu(hdr_hash->hash_value); > > > + return 0; > > > +} > > > + > > > +static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = { > > > + .xmo_rx_hash= virtnet_xdp_rx_hash, > > > +}; > > > + > > > static int virtnet_probe(struct virtio_device *vdev) > > > { > > > int i, err = -ENOMEM; > > > @@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev) > > > dev->ethtool_ops = &virtnet_ethtool_ops; > > > SET_NETDEV_DEV(dev, &vdev->dev); > > > > > > + dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops; > > > + > > > /* Do we support "hardware" checksums? */ > > > if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { > > > /* This opens up the world of extra features. */ > > >
Re: [PATCH 1/3] dt-bindings: clock: gcc-msm8953: add reset for MDSS subsystem
On 23/01/2024 22:03, Luca Weiss wrote: > From: Vladimir Lypak > > Add a new define for the GCC_MDSS_BCR found on MSM8953. > > Signed-off-by: Vladimir Lypak > [luca: expand commit message] > Signed-off-by: Luca Weiss Acked-by: Krzysztof Kozlowski Best regards, Krzysztof