Re: [PATCH v5 2/3] printf: break kunit into test cases

2025-03-06 Thread Petr Mladek
On Fri 2025-02-21 15:34:31, Tamir Duberstein wrote:
> Move all tests into `printf_test_cases`. This gives us nicer output in
> the event of a failure.
> 
> Combine `plain_format` and `plain_hash` into `hash_pointer` since
> they're testing the same scenario.
> 
> Signed-off-by: Tamir Duberstein 
> ---
>  lib/tests/printf_kunit.c | 331 
> +--
>  1 file changed, 121 insertions(+), 210 deletions(-)
> 
> diff --git a/lib/tests/printf_kunit.c b/lib/tests/printf_kunit.c
> index 287bbfb61148..013df6f6dd49 100644
> --- a/lib/tests/printf_kunit.c
> +++ b/lib/tests/printf_kunit.c
> @@ -38,13 +38,8 @@ static unsigned int total_tests;
>  static char *test_buffer;
>  static char *alloced_buffer;
>  
> -static struct kunit *kunittest;
> -
> -#define tc_fail(fmt, ...) \
> - KUNIT_FAIL(kunittest, fmt, ##__VA_ARGS__)
> -
> -static void __printf(4, 0)
> -do_test(int bufsize, const char *expect, int elen,
> +static void __printf(5, 0)
> +do_test(struct kunit *kunittest, int bufsize, const char *expect, int elen,
>   const char *fmt, va_list ap)
>  {
>   va_list aq;
> @@ -58,59 +53,64 @@ do_test(int bufsize, const char *expect, int elen,
[...]
>  
>   if (memcmp(test_buffer, expect, written)) {
> - tc_fail("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected 
> '%.*s'",
> - bufsize, fmt, test_buffer, written, expect);
> + KUNIT_FAIL(kunittest, "vsnprintf(buf, %d, \"%s\", ...) wrote 
> '%s', expected '%.*s'",
> +bufsize, fmt, test_buffer, written, expect);
>   return;
>   }
>  }
>  
> -static void __printf(3, 4)
> -__test(const char *expect, int elen, const char *fmt, ...)
> +static void __printf(4, 0)

This should be:

static void __printf(4, 5)

The 2nd parameter is zero when the variable list of parameters is
passed using va_list.

> +__test(struct kunit *kunittest, const char *expect, int elen, const char 
> *fmt, ...)
>  {
>   va_list ap;
>   int rand;
>   char *p;

> @@ -247,89 +225,44 @@ plain_format(void)
>  #define ZEROS ""
>  #define ONES ""
>  
> -static int
> -plain_format(void)
> -{
> - /* Format is implicitly tested for 32 bit machines by plain_hash() */
> - return 0;
> -}
> -
>  #endif   /* BITS_PER_LONG == 64 */
>  
> -static int
> -plain_hash_to_buffer(const void *p, char *buf, size_t len)
> +static void
> +plain_hash_to_buffer(struct kunit *kunittest, const void *p, char *buf, 
> size_t len)
>  {
> - int nchars;
> -
> - nchars = snprintf(buf, len, "%p", p);
> -
> - if (nchars != PTR_WIDTH)
> - return -1;
> + KUNIT_ASSERT_EQ(kunittest, snprintf(buf, len, "%p", p), PTR_WIDTH);
>  
>   if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) {
>   kunit_warn(kunittest, "crng possibly not yet initialized. plain 
> 'p' buffer contains \"%s\"",
>   PTR_VAL_NO_CRNG);
> - return 0;
>   }
> -
> - return 0;
>  }
>  
> -static int
> -plain_hash(void)
> -{
> - char buf[PLAIN_BUF_SIZE];
> - int ret;
> -
> - ret = plain_hash_to_buffer(PTR, buf, PLAIN_BUF_SIZE);
> - if (ret)
> - return ret;
> -
> - if (strncmp(buf, PTR_STR, PTR_WIDTH) == 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -/*
> - * We can't use test() to test %p because we don't know what output to expect
> - * after an address is hashed.
> - */
>  static void
> -plain(void)
> +hash_pointer(struct kunit *kunittest)
>  {
> - int err;
> + if (no_hash_pointers)
> + kunit_skip(kunittest, "hash pointers disabled");
>  
> - if (no_hash_pointers) {
> - kunit_warn(kunittest, "skipping plain 'p' tests");
> - return;
> - }
> + char buf[PLAIN_BUF_SIZE];
>  
> - err = plain_hash();
> - if (err) {
> - tc_fail("plain 'p' does not appear to be hashed");
> - return;
> - }
> + plain_hash_to_buffer(kunittest, PTR, buf, PLAIN_BUF_SIZE);
>  
> - err = plain_format();
> - if (err) {
> - tc_fail("hashing plain 'p' has unexpected format");
> - }
> + /*
> +  * We can't use test() to test %p because we don't know what output to 
> expect
> +  * after an address is hashed.
> +  */

The code does not longer print a reasonable error message on failure.
I would extend the comment to make it easier to understand the
meaning. Also I would use the imperative style. Something like:

/*
 * The hash of %p is unpredictable, therefore test() cannot be used.
 * Instead, verify that the first 32 bits are zeros on a 64-bit system,
 * and confirm the non-hashed value is not printed.
 */
> +
> + KUNIT_EXPECT_MEMEQ(kunittest, buf, ZEROS, strlen(ZEROS));
> + KUNIT_EXPECT_MEMNEQ(kunittest, buf+strlen(ZEROS), PTR_STR, PTR_WIDTH);

This looks wrong. It should be either:

KUNIT_EXPECT_MEMNEQ(kunittest, buf, PTR_STR, PTR_WIDTH);

or

[PATCH 05/13] MIPS: make setup_zero_pages() use memblock

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

Allocating the zero pages from memblock is simpler because the memory is
already reserved.

This will also help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/mips/include/asm/mmzone.h |  2 --
 arch/mips/mm/init.c| 16 +---
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/mips/include/asm/mmzone.h b/arch/mips/include/asm/mmzone.h
index 14226ea42036..602a21aee9d4 100644
--- a/arch/mips/include/asm/mmzone.h
+++ b/arch/mips/include/asm/mmzone.h
@@ -20,6 +20,4 @@
 #define nid_to_addrbase(nid) 0
 #endif
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 3db6082c611e..f51cd97376df 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -59,25 +59,19 @@ EXPORT_SYMBOL(zero_page_mask);
 /*
  * Not static inline because used by IP27 special magic initialization code
  */
-void setup_zero_pages(void)
+static void __init setup_zero_pages(void)
 {
-   unsigned int order, i;
-   struct page *page;
+   unsigned int order;
 
if (cpu_has_vce)
order = 3;
else
order = 0;
 
-   empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+   empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE << order, 
PAGE_SIZE);
if (!empty_zero_page)
panic("Oh boy, that early out of memory?");
 
-   page = virt_to_page((void *)empty_zero_page);
-   split_page(page, order);
-   for (i = 0; i < (1 << order); i++, page++)
-   mark_page_reserved(page);
-
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
 
@@ -470,9 +464,9 @@ void __init mem_init(void)
BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
 
maar_init();
-   memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages.  */
mem_init_free_highmem();
+   memblock_free_all();
 
 #ifdef CONFIG_64BIT
if ((unsigned long) &_text > (unsigned long) CKSEG0)
@@ -486,8 +480,8 @@ void __init mem_init(void)
 void __init mem_init(void)
 {
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-   memblock_free_all();
setup_zero_pages(); /* This comes from node 0 */
+   memblock_free_all();
 }
 #endif /* !CONFIG_NUMA */
 
-- 
2.47.2




Re: [PATCH v3 net-next 06/13] net: enetc: add RSS support for i.MX95 ENETC PF

2025-03-06 Thread Jakub Kicinski
On Tue,  4 Mar 2025 15:21:54 +0800 Wei Fang wrote:
> Add Receive side scaling (RSS) support for i.MX95 ENETC PF to improve
> the network performance and balance the CPU loading. The main changes
> are as follows.
> 
> 1. Since i.MX95 ENETC (v4) use NTMP 2.0 to manage the RSS table, which
> is different from LS1028A ENETC (v1). In order to reuse some functions
> related to the RSS table, so add .get_rss_table() and .set_rss_table()
> hooks to enetc_si_ops.
> 
> 2. Since the offset of the RSS key registers of i.MX95 ENETC is also
> different from that of LS1028A, so add enetc_get_rss_key_base() to get
> the base offset for the different chips, so that enetc_set_rss_key()
> and enetc_get_rss_key() can be reused for this trivial.

Please split this patch into refactoring of existing code and new
additions.



Re: [PATCH] book3s64/radix : Align section vmemmap start address to PAGE_SIZE

2025-03-06 Thread Donet Tom



On 3/6/25 9:41 AM, Aneesh Kumar K.V wrote:

Donet Tom  writes:


On 3/3/25 18:32, Aneesh Kumar K.V wrote:

Donet Tom  writes:


A vmemmap altmap is a device-provided region used to provide
backing storage for struct pages. For each namespace, the altmap
should belong to that same namespace. If the namespaces are
created unaligned, there is a chance that the section vmemmap
start address could also be unaligned. If the section vmemmap
start address is unaligned, the altmap page allocated from the
current namespace might be used by the previous namespace also.
During the free operation, since the altmap is shared between two
namespaces, the previous namespace may detect that the page does
not belong to its altmap and incorrectly assume that the page is a
normal page. It then attempts to free the normal page, which leads
to a kernel crash.

In this patch, we are aligning the section vmemmap start address
to PAGE_SIZE. After alignment, the start address will not be
part of the current namespace, and a normal page will be allocated
for the vmemmap mapping of the current section. For the remaining
sections, altmaps will be allocated. During the free operation,
the normal page will be correctly freed.

Without this patch
==
NS1 start   NS2 start
   _
| NS1   |NS2  |
   -
| Altmap| Altmap | .|Altmap| Altmap | ...
|  NS1  |  NS1   |  | NS2  |  NS2   |


  ^^^ this should be allocated in ram?


Yes, it should be allocated from RAM. However, in the current
implementation, an altmap page gets allocated. This is because the
NS2 vmemmap section's start address is unaligned. There is an
altmap_cross_boundary() check. Here, from the vmemmap section
start, we identify the namespace start and check if the namespace start
is within the boundary. Since it is within the boundary, it returns false,
causing an altmap page to be allocated. During the PTE update, the
vmemmap start address is aligned down to PAGE_SIZE, and the PTE is
updated. As a result, the altmap page is shared between the current
and previous namespaces.

If we had aligned the vmemmap start address, the
altmap_cross_boundary() function would return true because the
vmemmap section's start address belongs to the previous
namespace. Therefore normal page gets allocated. During the
PTE set operation, since the address is already aligned, the
PTE will updated.


So the nvdimm driver should ensure that alignment right? I assume other things
will also require that to be properly aligned.?


 #cat /proc/iomem
-63 : System RAM
4034000-403401f : namespace1.0
4034020-403a0ff : dax1.0
403a100-403a11f : namespace1.1
403a120-40401ff : dax1.1
4040200-404021f : namespace1.2
4040220-40462ff : dax1.2
4046300-404631f : namespace1.3
4046320-404c3ff : dax1.3
 #

I have created 4 namespaces with a size of 1552M. As you can see, the 
start of
namespace1.0 is 1G aligned, while namespace1.1, namespace1.2, and 
namespace1.3
are not 1G aligned. If I had created the namespace with a size of 1536M 
(1.5G), then

all the namespaces would have started 1G aligned.

I believe that based on the size we are requesting, the namespaces 
alignments are

being created. They do not always need to be 1G aligned.

Now, if we calculate the vmemmap start for namespace1.1..

Phy start - 0x403a100
pfn start - 0x403a100 / PAGE_SIZE = 0x403a100

vmemmap start = 0xc00c + (0x403a100 * 0x40)
  =0xC00C000100E84000

This address is not page aligned. This will trigger this issue.



-aneesh




Re: PowerPC: sleftests/powerpc fails to compile linux-next

2025-03-06 Thread Madhavan Srinivasan



On 3/6/25 10:30 PM, Venkat Rao Bagalkote wrote:
> Greetings!!
> 
> I see selftests/powerpc fails to compile on next-20250306.
> 
> This error has been introduced in next-20250218. Make is successful on 
> next-20250217.
> 
> 
> Attached is the .config used.
> 
> 
> If you fix this, please add below tag.
> 
> Reported-by: Venkat Rao Bagalkote 
> 
> 
> Errors:
> 
> make -C powerpc/
> make: Entering directory 
> '/root/venkat/linux-next/tools/testing/selftests/powerpc'
> Makefile:60: warning: overriding recipe for target 'emit_tests'
> ../lib.mk:182: warning: ignoring old recipe for target 'emit_tests'
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/alignment;
>  mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C alignment all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/benchmarks;
>  mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C benchmarks all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/cache_shape;
>  mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C cache_shape all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/copyloops;
>  mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C copyloops all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/dexcr; 
> mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C dexcr all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/dscr; 
> mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C dscr all
> make[1]: Nothing to be done for 'all'.
> BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/mm; 
> mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C mm all
>   CC   pkey_exec_prot
> In file included from pkey_exec_prot.c:18:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In 
> function ‘pkeys_unsupported’:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function)
>    96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
>   |  ^

Commit 6d61527d931ba ('mm/pkey: Add PKEY_UNRESTRICTED macro') added a macro 
PKEY_UNRESTRICTED to handle implicit literal
value of 0x0 (which is "unrestricted"). belore patch add the same to powerpc/mm 
selftest.

Can you try with this patch to check whether it fixes the build break for you


diff --git a/tools/testing/selftests/powerpc/include/pkeys.h 
b/tools/testing/selftests/powerpc/include/pkeys.h
index c6d4063dd4f6..d6deb6ffa1b9 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,6 +24,9 @@
 #undef PKEY_DISABLE_EXECUTE
 #define PKEY_DISABLE_EXECUTE   0x4
 
+#undef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED  0x0
+
 /* Older versions of libc do not define this */
 #ifndef SEGV_PKUERR
 #define SEGV_PKUERR4


Maddy



> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  note: each undeclared identifier is reported only once for each function it 
> appears in
> pkey_exec_prot.c: In function ‘segv_handler’:
> pkey_exec_prot.c:75:53: error: ‘PKEY_UNRESTRICTED’ undeclared (first use in 
> this function)
>    75 | pkey_set_rights(fault_pkey, 
> PKEY_UNRESTRICTED);
>   | ^
> make[1]: *** [../../lib.mk:222: 
> /root/venkat/linux-next/tools/testing/selftests/powerpc/mm/pkey_exec_prot] 
> Error 1
>   CC   pkey_siginfo
> In file included from pkey_siginfo.c:22:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In 
> function ‘pkeys_unsupported’:
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function)
>    96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
>   |  ^
> /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34:
>  note: each undeclared identifier is reported only once for each function it 
> appears in
> pkey_siginfo.c: In function ‘segv_handler’:
> pkey_siginfo.c:86:39: error: ‘PKEY_UNRESTRICTED’ undeclared (first use in 
> this function)
>    86 | pkey_set_rights(pkey, PKEY_UNRESTRICTED);
>   |   ^
> make[1]: *** [../../lib.mk:222: 
> /root/venkat/linux-next/tools/testing/selftests/powerpc/mm/pkey_siginfo] 
> Error 1
> make[1]: Target 'all' not remade because of errors.
> make: *** [Makefile:40: mm] Error 2
> make: Leaving directory 
> '/root/venkat/linux-next/tools/testing/selftests/powerpc'
> 
> 
> Regards,
> 
> Venkat.




Re: [PATCH v4] powerpc/hugetlb: Disable gigantic hugepages if fadump is active

2025-03-06 Thread Sourabh Jain





On 06/03/25 00:47, Ritesh Harjani (IBM) wrote:

Sourabh Jain  writes:


Hello Ritesh,


On 04/03/25 10:27, Ritesh Harjani (IBM) wrote:

Sourabh Jain  writes:


Hello Ritesh,

Thanks for the review.

On 02/03/25 12:05, Ritesh Harjani (IBM) wrote:

Sourabh Jain  writes:


The fadump kernel boots with limited memory solely to collect the kernel
core dump. Having gigantic hugepages in the fadump kernel is of no use.

Sure got it.


Many times, the fadump kernel encounters OOM (Out of Memory) issues if
gigantic hugepages are allocated.

To address this, disable gigantic hugepages if fadump is active by
returning early from arch_hugetlb_valid_size() using
hugepages_supported(). When fadump is active, the global variable
hugetlb_disabled is set to true, which is later used by the
PowerPC-specific hugepages_supported() function to determine hugepage
support.

Returning early from arch_hugetlb_vali_size() not only disables
gigantic hugepages but also avoids unnecessary hstate initialization for
every hugepage size supported by the platform.

kernel logs related to hugepages with this patch included:
kernel argument passed: hugepagesz=1G hugepages=1

First kernel: gigantic hugepage got allocated
==

dmesg | grep -i "hugetlb"
-
HugeTLB: registered 1.00 GiB page size, pre-allocated 1 pages
HugeTLB: 0 KiB vmemmap can be freed for a 1.00 GiB page
HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages
HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page

$ cat /proc/meminfo | grep -i "hugetlb"
-
Hugetlb: 1048576 kB

Was this tested with patch [1] in your local tree?

[1]: 
https://web.git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=d629d7a8efc33

IIUC, this patch [1] disables the boot time allocation of hugepages.
Isn't it also disabling the boot time allocation for gigantic huge pages
passed by the cmdline params like hugepagesz=1G and hugepages=2 ?

Yes, I had the patch [1] in my tree.

My understanding is that gigantic pages are allocated before normal huge
pages.

In hugepages_setup() in hugetlb.c, we have:

       if (hugetlb_max_hstate && hstate_is_gigantic(parsed_hstate))
       hugetlb_hstate_alloc_pages(parsed_hstate);

I believe the above code allocates memory for gigantic pages, and
hugetlb_init() is
called later because it is a subsys_initcall.

So, by the time the kernel reaches hugetlb_init(), the gigantic pages
are already
allocated. Isn't that right?

Please let me know your opinion.

Yes, you are right. We are allocating hugepages from memblock, however
this isn't getting advertized anywhere. i.e. there is no way one can
know from any user interface on whether hugepages were allocated or not.
i.e. for fadump kernel when hugepagesz= and hugepages= params are
passed, though it will allocate gigantic pages, it won't advertize this
in meminfo or anywhere else. This was adding the confusion when I tested
this (which wasn't clear from the commit msg either).

And I guess this is happening during fadump kernel because of our patch
[1], which added a check to see whether hugetlb_disabled is true in
hugepages_supported(). Due to this hugetlb_init() is now not doing the
rest of the initialization for those gigantic pages which were allocated
due to cmdline options from hugepages_setup().

[1]: 
https://lore.kernel.org/linuxppc-dev/20241202054310.928610-1-sourabhj...@linux.ibm.com/

Now as we know from below that fadump can set hugetlb_disabled call in 
early_setup().
i.e. fadump can mark hugetlb_disabled to true in
early_setup() -> early_init_devtree() -> fadump_reserve_mem()

And hugepages_setup() and hugepagesz_setup() gets called late in
start_kernel() -> parse_args()


And we already check for hugepages_supported() in all necessary calls in
mm/hugetlb.c. So IMO, this check should go in mm/hugetlb.c in
hugepagesz_setup() and hugepages_setup(). Because otherwise every arch
implementation will end up duplicating this by adding
hugepages_supported() check in their arch implementation of
arch_hugetlb_valid_size().

e.g. references of hugepages_supported() checks in mm/hugetlb.c

mm/hugetlb.c hugetlb_show_meminfo_node 4959 if (!hugepages_supported())
mm/hugetlb.c hugetlb_report_node_meminfo 4943 if (!hugepages_supported())
mm/hugetlb.c hugetlb_report_meminfo 4914 if (!hugepages_supported())
mm/hugetlb.c hugetlb_overcommit_handler 4848 if (!hugepages_supported())
mm/hugetlb.c hugetlb_sysctl_handler_common 4809 if (!hugepages_supported())
mm/hugetlb.c hugetlb_init 4461 if (!hugepages_supported()) {
mm/hugetlb.c dissolve_free_hugetlb_folios 2211 if (!hugepages_supported())
fs/hugetlbfs/inode.c init_hugetlbfs_fs 1604 if (!hugepages_supported()) {


Let me also see the history on why this wasn't done earlier though...

... Oh actually there is more history to this. See [2]. We already had
hugepages_supported() check in hugepages_setup() and other places
earlier which was 

Re: [PATCH] PCI/AER: Add kernel.aer_print_skip_mask to control aer log

2025-03-06 Thread Bijie Xu
On Tue, 4 Mar 2025 17:22:30 -0600, Bjorn Helgaas wrote:
> Can you take a look at this and see if it's going the right direction
> for you, or if it needs extensions to do what you need?
Thanks for your suggestion. I've taken sometime to review that patch you 
suggested. 
It solves part of the problem. And it can set ratelimit on a single device, 
which
is good. 

But this patch solves the problem in a different way. 

1. Some users are very nervous to notice this kind of error logs. This patch can
give them an option to disable these logs entirely on the whole system level 
instead of just set a ratelimit on a specific device.

2. The sysctl configuration can be persisted after a system reboot. Users may 
dislike
these AER logs appearing again after a system reboot.

Regards,
Bijie Xu



Re: [PATCH v5 1/3] printf: convert self-test to KUnit

2025-03-06 Thread Tamir Duberstein
On Thu, Mar 6, 2025 at 9:25 AM Tamir Duberstein  wrote:
>
> On Thu, Mar 6, 2025 at 7:25 AM Petr Mladek  wrote:
> >
> > On Fri 2025-02-21 15:34:30, Tamir Duberstein wrote:
> > > Convert the printf() self-test to a KUnit test.
> > >
> > > [...]
> > >
>
> > 2. What was the motivation to remove the trailing '\n', please?
> >
> >It actually makes a difference from the printk() POV. Messages without
> >the trailing '\n' are _not_ flushed to the console until another
> >message is added. The reason is that they might still be appended
> >by pr_cont(). And printk() emits only complete lines to the
> >console.
> >
> >In general, messages should include the trailing '\n' unless the
> >code wants to append something later or the trailing '\n' is
> >added by another layer of the code. It does not seem to be this case.
> >
> >
> > >   bufsize, fmt, ret, elen);
> > > - return 1;
> > > + return;
> > >   }
> >
> > [...]
>
> I noticed in my testing that the trailing \n didn't change the test
> output, but I didn't know the details you shared about the trailing
> \n. I'll restore them, unless we jump straight to the KUNIT macros per
> the discussion above.

Ah, I forgot that `tc_fail` already delegates to KUNIT_FAIL. This was
the reason I removed the trailing newlines -- there is a mix of
present and absent trailing newlines in KUNIT_* macros, and it's not
clear to me what the correct thing is. For instance, the examples in
Documentation/dev-tools/kunit/{start,usage}.rst omit the trailing newlines.



Re: [PATCH v5 1/3] printf: convert self-test to KUnit

2025-03-06 Thread Petr Mladek
On Fri 2025-02-21 15:34:30, Tamir Duberstein wrote:
> Convert the printf() self-test to a KUnit test.
> 
> In the interest of keeping the patch reasonably-sized this doesn't
> refactor the tests into proper parameterized tests - it's all one big
> test case.
> 
> Signed-off-by: Tamir Duberstein 
> ---
>  Documentation/core-api/printk-formats.rst   |   4 +-
>  MAINTAINERS |   2 +-
>  lib/Kconfig.debug   |  12 +-
>  lib/Makefile|   1 -
>  lib/tests/Makefile  |   1 +
>  lib/{test_printf.c => tests/printf_kunit.c} | 188 
> +++-
>  tools/testing/selftests/lib/config  |   1 -
>  tools/testing/selftests/lib/printf.sh   |   4 -
>  8 files changed, 117 insertions(+), 96 deletions(-)
> 
> diff --git a/Documentation/core-api/printk-formats.rst 
> b/Documentation/core-api/printk-formats.rst
> index e0473da9..4bdc394e86af 100644
> --- a/Documentation/core-api/printk-formats.rst
> +++ b/Documentation/core-api/printk-formats.rst
> @@ -661,7 +661,7 @@ Do *not* use it from C.
>  Thanks
>  ==
>  
> -If you add other %p extensions, please extend  with
> -one or more test cases, if at all feasible.
> +If you add other %p extensions, please extend 
> +with one or more test cases, if at all feasible.
>  
>  Thank you for your cooperation and attention.
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f076360ce3c6..b051ccf6b276 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -25510,8 +25510,8 @@ R:Sergey Senozhatsky 
>  S:   Maintained
>  T:   git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
>  F:   Documentation/core-api/printk-formats.rst
> -F:   lib/test_printf.c
>  F:   lib/test_scanf.c
> +F:   lib/tests/printf_kunit.c
>  F:   lib/vsprintf.c
>  
>  VT1211 HARDWARE MONITOR DRIVER
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 7ddbfdacf895..d2b15f633227 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2436,6 +2436,15 @@ config ASYNC_RAID6_TEST
>  config TEST_HEXDUMP
>   tristate "Test functions located in the hexdump module at runtime"
>  
> +config PRINTF_KUNIT_TEST
> + tristate "KUnit test printf() family of functions at runtime" if 
> !KUNIT_ALL_TESTS
> + depends on KUNIT
> + default KUNIT_ALL_TESTS
> + help
> +   Enable this option to test the printf functions at runtime.
> +
> +   If unsure, say N.
> +
>  config STRING_KUNIT_TEST
>   tristate "KUnit test string functions at runtime" if !KUNIT_ALL_TESTS
>   depends on KUNIT
> @@ -2449,9 +2458,6 @@ config STRING_HELPERS_KUNIT_TEST
>  config TEST_KSTRTOX
>   tristate "Test kstrto*() family of functions at runtime"
>  
> -config TEST_PRINTF
> - tristate "Test printf() family of functions at runtime"
> -
>  config TEST_SCANF
>   tristate "Test scanf() family of functions at runtime"
>  
> diff --git a/lib/Makefile b/lib/Makefile
> index 961aef91d493..f31e6a3100ba 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -77,7 +77,6 @@ obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
>  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
>  obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
>  obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
> -obj-$(CONFIG_TEST_PRINTF) += test_printf.o
>  obj-$(CONFIG_TEST_SCANF) += test_scanf.o
>  
>  obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
> diff --git a/lib/tests/Makefile b/lib/tests/Makefile
> index 8961fbcff7a4..183c6a838a5d 100644
> --- a/lib/tests/Makefile
> +++ b/lib/tests/Makefile
> @@ -30,6 +30,7 @@ obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
>  obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
>  CFLAGS_overflow_kunit.o = $(call cc-disable-warning, 
> tautological-constant-out-of-range-compare)
>  obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
> +obj-$(CONFIG_PRINTF_KUNIT_TEST) += printf_kunit.o
>  obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
>  obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
>  obj-$(CONFIG_TEST_SORT) += test_sort.o
> diff --git a/lib/test_printf.c b/lib/tests/printf_kunit.c
> similarity index 87%
> rename from lib/test_printf.c
> rename to lib/tests/printf_kunit.c
> index 59dbe4f9a4cb..287bbfb61148 100644
> --- a/lib/test_printf.c
> +++ b/lib/tests/printf_kunit.c
> @@ -3,9 +3,7 @@
>   * Test cases for printf facility.
>   */
>  
> -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> -
> -#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -25,8 +23,6 @@
>  
>  #include 
>  
> -#include "../tools/testing/selftests/kselftest_module.h"
> -
>  #define BUF_SIZE 256
>  #define PAD_SIZE 16
>  #define FILL_CHAR '$'
> @@ -37,12 +33,17 @@
>   block \
>   __diag_pop();
>  
> -KSTM_MODULE_GLOBALS();
> +static unsigned int total_tests;
> +
> +static char *test_buffer;
> +static char *alloced_buffer;
> +
> +static struct kunit *kunittest;
>  
> -static char *test_buffer __initdata;
> -static char *alloced_buffer __i

RE: [EXTERNAL] Re: [PATCH v4 1/2] watchdog: Add a new flag WDIOF_STOP_MAYSLEEP

2025-03-06 Thread George Cherian
Hi Guenter,

I am summarizing the topics we discussed in multiple threads here.

>>On 3/5/25 03:01, Ahmad Fatoum wrote:
>> Hi George,
>> Hi Guenter,
>> 
>> On 05.03.25 11:34, George Cherian wrote:
 why is armada_37xx_wdt also here?
 The stop function in that driver may not sleep.
>>> Marek,
>>>
>>> Thanks for reviewing.
>>> Since the stop function has a regmap_write(), I thought it might sleep.
>>> Now that you pointed it out, I assume that it is an MMIO based regmap being 
>>> used for armada.
>>> I will update the same in the next version
>> 
>> Failure to add WDIOF_STOP_MAYSLEEP when it's needed can lead to
>> kernel hanging. Failure to add an alternative WDIOF_STOP_ATOMIC
>> would lead to the kernel option being a no-op.
>> 
>> I think a no-op stop_on_panic (or reset_on_panic) is a saner default.
>> 
>
>Agreed. Also, I like WDIOF_STOP_ATOMIC more than the WDIOF_STOP_NOSLEEP
>I had suggested in my other response.

1. Instead of blacklisting drivers as WDIOF_STOP_MAYSLEEP, the option will an 
opt-in.
2. This may not be WDIOF_STOP_AOMIC, instead would be a generic flag not 
limited to STOP
operation. May be WDIOF_OPS_ATOMIC (OPS include - .start, .stop, 
.set_timeout, .ping)
3. Remove the kernel command line option (stop_on_panic) and have a generic 
reset_on_panic.
4. reset_on_panic=60 (by default )  meaning on a panic the wdog timeout is 
updated to 60sec
 or the clamp_t(reset_on_panic, min, max_hw_heartbeat_ms).
5. if reset_on_panic=0, it means the watchdog is stopped on panic.
6. All of these to be done by default from panic handler incase of a configured 
kdump kernel.

-George

>
>Thanks,
>Guenter



Re: [RFC kvm-unit-tests PATCH] lib: Use __ASSEMBLER__ instead of __ASSEMBLY__

2025-03-06 Thread Thomas Huth

On 22/02/2025 02.45, Sean Christopherson wrote:

Convert all non-x86 #ifdefs from __ASSEMBLY__ to __ASSEMBLER__, and remove
all manual __ASSEMBLY__ #defines.  __ASSEMBLY_ was inherited blindly from
the Linux kernel, and must be manually defined, e.g. through build rules
or with the aforementioned explicit #defines in assembly code.

__ASSEMBLER__ on the other hand is automatically defined by the compiler
when preprocessing assembly, i.e. doesn't require manually #defines for
the code to function correctly.

Ignore x86, as x86 doesn't actually rely on __ASSEMBLY__ at the moment,
and is undergoing a parallel cleanup.

Signed-off-by: Sean Christopherson 
---

Completely untested.  This is essentially a "rage" patch after spending
way, way too much time trying to understand why I couldn't include some
__ASSEMBLY__ protected headers in x86 assembly files.


Thanks, applied (after fixing the spot that Andrew mentioned and another one 
that has been merged in between)!


BTW, do you happen to know why the kernel uses __ASSEMBLY__ and not 
__ASSEMBLER__? Just grown historically, or is there a real reason?


 Thomas




PowerPC: Observing Kernel softlockup while running ftrace selftest

2025-03-06 Thread Venkat Rao Bagalkote

Greetings!!!


I am observing soft lock up's while running ftrace selftest on 
linux-next kernel.


Kernel Version: next-20250305

make run_tests
TAP version 13
1..2
# timeout set to 0
# selftests: ftrace: poll
# Error: Polling file is not specified
not ok 1 selftests: ftrace: poll # exit=255
# timeout set to 0
# selftests: ftrace: ftracetest-ktap
# TAP version 13
# 1..155
# ok 1 Basic trace file check
# ok 2 Basic test for tracers
# ok 3 Basic trace clock test
# ok 4 Basic event tracing check
# ok 5 Test tracefs GID mount option
# ok 6 Change the ringbuffer size
# ok 7 Change the ringbuffer sub-buffer size
# ok 8 Snapshot and tracing_cpumask
# ok 9 Snapshot and tracing setting
# ok 10 Test file and directory ownership changes for eventfs
# ok 11 Basic tests on writing to trace_marker
# ok 12 trace_pipe and trace_marker
# not ok 13 Test ftrace direct functions against tracers # UNRESOLVED
# not ok 14 Test ftrace direct functions against kprobes # UNRESOLVED
# ok 15 Generic dynamic event - add/remove probes with BTF arguments # SKIP
# ok 16 Generic dynamic event - add/remove eprobe events
# ok 17 Generic dynamic event - Repeating add/remove fprobe events # SKIP
# ok 18 Generic dynamic event - add/remove fprobe events # SKIP
# ok 19 Generic dynamic event - add/remove kprobe events
# ok 20 Generic dynamic event - add/remove synthetic events
# ok 21 Generic dynamic event - add/remove tracepoint probe events on 
module # SKIP

# ok 22 Generic dynamic event - add/remove tracepoint probe events # SKIP
# ok 23 Generic dynamic event - add/remove/test uprobe events
# ok 24 Generic dynamic event - selective clear (compatibility)
# ok 25 Checking dynamic events limitations
# ok 26 Event probe event parser error log check
# ok 27 Fprobe event VFS type argument # SKIP
# ok 28 Function return probe entry argument access # SKIP
# ok 29 Fprobe event parser error log check # SKIP
# ok 30 Generic dynamic event - generic clear event
# ok 31 Generic dynamic event - check if duplicate events are caught
# ok 32 Tracepoint probe event parser error log check # SKIP
# ok 33 event tracing - enable/disable with event level files
# not ok 34 event tracing - enable/disable with module event # UNRESOLVED
# ok 35 event tracing - restricts events based on pid notrace filtering
# ok 36 event tracing - restricts events based on pid
# ok 37 event tracing - enable/disable with subsystem level files
# ok 38 event tracing - enable/disable with top level files
# not ok 39 Test trace_printk from module # UNRESOLVED
# ok 40 event filter function - test event filtering on functions
# ok 41 ftrace - function graph filters with stack tracer
# ok 42 ftrace - function graph filters
# ok 43 ftrace - function graph filters
# ok 44 ftrace - function profiler with function graph tracing
# ok 45 ftrace - function graph print function return value # SKIP
# ok 46 ftrace - function trace with cpumask
# ok 47 ftrace - test for function event triggers
# ok 48 ftrace - function glob filters
# ok 49 ftrace - function pid notrace filters
# ok 50 ftrace - function pid filters
# ok 51 ftrace - stacktrace filter command


Warnings:


[ 2668.008907] watchdog: BUG: soft lockup - CPU#0 stuck for 2265s! 
[swapper/0:0]
[ 2668.008954] Modules linked in: bonding(E) nft_fib_inet(E) 
nft_fib_ipv4(E) nft_fib_ipv6(E) nft_fib(E) nft_reject_inet(E) 
nf_reject_ipv4(E) nf_reject_ipv6(E) nft_reject(E) nft_ct(E) 
nft_chain_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) 
nf_defrag_ipv4(E) ip_set(E) rfkill(E) nf_tables(E) nfnetlink(E) 
mlx5_ib(E) ib_uverbs(E) ib_core(E) pseries_rng(E) vmx_crypto(E) 
dax_pmem(E) drm(E) drm_panel_orientation_quirks(E) xfs(E) sr_mod(E) 
cdrom(E) sd_mod(E) sg(E) lpfc(E) nd_pmem(E) nvmet_fc(E) nd_btt(E) 
ibmvscsi(E) scsi_transport_srp(E) ibmveth(E) nvmet(E) nvme_fc(E) 
mlx5_core(E) nvme_fabrics(E) papr_scm(E) mlxfw(E) nvme_core(E) 
libnvdimm(E) tls(E) psample(E) scsi_transport_fc(E) fuse(E)
[ 2668.010198] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Kdump: loaded 
Tainted: G    W   EL 6.14.0-rc5-next-20250305 #1

[ 2668.010242] Tainted: [W]=WARN, [E]=UNSIGNED_MODULE, [L]=SOFTLOCKUP
[ 2668.010276] Hardware name: IBM,8375-42A POWER9 (architected) 0x4e0202 
0xf05 of:IBM,FW950.80 (VL950_131) hv:phyp pSeries
[ 2668.010316] NIP:  c0039f90 LR: c007c1a4 CTR: 
c0039f44
[ 2668.010354] REGS: c2c9f538 TRAP: 0900   Tainted: G    W   
EL  (6.14.0-rc5-next-20250305)
[ 2668.010392] MSR:  80009033   CR: 
22002844  XER: 2004

[ 2668.010639] CFAR:  IRQMASK: 0
[ 2668.010639] GPR00: c007c1a4 c2c9f510 c1688100 
c2c9f508
[ 2668.010639] GPR04: 82823033 c2c61700 c2c61700 
0178
[ 2668.010639] GPR08: 0002 0049  
2000
[ 2668.010639] GPR12: c0328588 c302  

[ 2668.010639] GPR16:    

[PATCH 13/13] arch, mm: make releasing of memory to page allocator more explicit

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

The point where the memory is released from memblock to the buddy allocator
is hidden inside arch-specific mem_init()s and the call to
memblock_free_all() is needlessly duplicated in every artiste cure and
after introduction of arch_mm_preinit() hook, mem_init() implementation on
many architecture only contains the call to memblock_free_all().

Pull memblock_free_all() call into mm_core_init() and drop mem_init() on
relevant architectures to make it more explicit where the free memory is
released from memblock to the buddy allocator and to reduce code
duplication in architecture specific code.

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/alpha/mm/init.c |  6 --
 arch/arc/mm/init.c   | 11 ---
 arch/arm/mm/init.c   | 11 ---
 arch/arm64/mm/init.c | 11 ---
 arch/csky/mm/init.c  |  5 -
 arch/hexagon/mm/init.c   | 18 --
 arch/loongarch/kernel/numa.c |  5 -
 arch/loongarch/mm/init.c |  5 -
 arch/m68k/mm/init.c  |  2 --
 arch/microblaze/mm/init.c|  3 ---
 arch/mips/mm/init.c  |  5 -
 arch/nios2/mm/init.c |  6 --
 arch/openrisc/mm/init.c  |  3 ---
 arch/parisc/mm/init.c|  2 --
 arch/powerpc/mm/mem.c|  5 -
 arch/riscv/mm/init.c |  5 -
 arch/s390/mm/init.c  |  6 --
 arch/sh/mm/init.c|  2 --
 arch/sparc/mm/init_32.c  |  5 -
 arch/sparc/mm/init_64.c  |  2 --
 arch/um/kernel/mem.c |  2 --
 arch/x86/mm/init_32.c|  3 ---
 arch/x86/mm/init_64.c|  2 --
 arch/xtensa/mm/init.c|  9 -
 include/linux/memblock.h |  1 -
 mm/internal.h|  3 ++-
 mm/mm_init.c |  5 +
 27 files changed, 7 insertions(+), 136 deletions(-)

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 3ab2d2f3c917..2d491b8cdab9 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -273,12 +273,6 @@ srm_paging_stop (void)
 }
 #endif
 
-void __init
-mem_init(void)
-{
-   memblock_free_all();
-}
-
 static const pgprot_t protection_map[16] = {
[VM_NONE]   = _PAGE_P(_PAGE_FOE | 
_PAGE_FOW |
  _PAGE_FOR),
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 90715b4a0bfa..a73cc94f806e 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -169,17 +169,6 @@ void __init arch_mm_preinit(void)
BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
 
-/*
- * mem_init - initializes memory
- *
- * Frees up bootmem
- * Calculates and displays memory available/used
- */
-void __init mem_init(void)
-{
-   memblock_free_all();
-}
-
 #ifdef CONFIG_HIGHMEM
 int pfn_valid(unsigned long pfn)
 {
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 7222100b0631..54bdca025c9f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -263,17 +263,6 @@ void __init arch_mm_preinit(void)
 #endif
 }
 
-/*
- * mem_init() marks the free areas in the mem_map and tells us how much
- * memory is free.  This is done after various parts of the system have
- * claimed their memory after the kernel image.
- */
-void __init mem_init(void)
-{
-   /* this will put all unused low memory onto the freelists */
-   memblock_free_all();
-}
-
 #ifdef CONFIG_STRICT_KERNEL_RWX
 struct section_perm {
const char *name;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8eff6a6eb11e..510695107233 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -411,17 +411,6 @@ void __init arch_mm_preinit(void)
}
 }
 
-/*
- * mem_init() marks the free areas in the mem_map and tells us how much memory
- * is free.  This is done after various parts of the system have claimed their
- * memory after the kernel image.
- */
-void __init mem_init(void)
-{
-   /* this will put all unused low memory onto the freelists */
-   memblock_free_all();
-}
-
 void free_initmem(void)
 {
void *lm_init_begin = lm_alias(__init_begin);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index 3914c2b873da..573da66b2543 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -42,11 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned 
long)]
__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
-void __init mem_init(void)
-{
-   memblock_free_all();
-}
-
 void free_initmem(void)
 {
free_initmem_default(-1);
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index d412c2314509..34eb9d424b96 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -43,24 +43,6 @@ DEFINE_SPINLOCK(kmap_gen_lock);
 /*  checkpatch says don't init this to 0.  */
 unsigned long long kmap_generation;
 
-/*
- * mem_init - initializes memory
- *
- * Frees up bootmem
- * Fixes up more stuff for HIGHMEM
- * Calculates a

[PATCH 02/13] csky: move setup_initrd() to setup.c

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

Memory used by initrd should be reserved as soon as possible before
there any memblock allocations that might overwrite that memory.

This will also help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/csky/kernel/setup.c | 43 
 arch/csky/mm/init.c  | 43 
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index fe715b707fd0..e0d6ca86ea8c 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -12,6 +12,45 @@
 #include 
 #include 
 
+#ifdef CONFIG_BLK_DEV_INITRD
+static void __init setup_initrd(void)
+{
+   unsigned long size;
+
+   if (initrd_start >= initrd_end) {
+   pr_err("initrd not found or empty");
+   goto disable;
+   }
+
+   if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
+   pr_err("initrd extends beyond end of memory");
+   goto disable;
+   }
+
+   size = initrd_end - initrd_start;
+
+   if (memblock_is_region_reserved(__pa(initrd_start), size)) {
+   pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region",
+  __pa(initrd_start), size);
+   goto disable;
+   }
+
+   memblock_reserve(__pa(initrd_start), size);
+
+   pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
+   (void *)(initrd_start), size);
+
+   initrd_below_start_ok = 1;
+
+   return;
+
+disable:
+   initrd_start = initrd_end = 0;
+
+   pr_err(" - disabling initrd\n");
+}
+#endif
+
 static void __init csky_memblock_init(void)
 {
unsigned long lowmem_size = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET);
@@ -40,6 +79,10 @@ static void __init csky_memblock_init(void)
max_low_pfn = min_low_pfn + sseg_size;
}
 
+#ifdef CONFIG_BLK_DEV_INITRD
+   setup_initrd();
+#endif
+
max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
 
mmu_init(min_low_pfn, max_low_pfn);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index bde7cabd23df..ab51acbc19b2 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -42,45 +42,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned 
long)]
__page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init setup_initrd(void)
-{
-   unsigned long size;
-
-   if (initrd_start >= initrd_end) {
-   pr_err("initrd not found or empty");
-   goto disable;
-   }
-
-   if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
-   pr_err("initrd extends beyond end of memory");
-   goto disable;
-   }
-
-   size = initrd_end - initrd_start;
-
-   if (memblock_is_region_reserved(__pa(initrd_start), size)) {
-   pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region",
-  __pa(initrd_start), size);
-   goto disable;
-   }
-
-   memblock_reserve(__pa(initrd_start), size);
-
-   pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
-   (void *)(initrd_start), size);
-
-   initrd_below_start_ok = 1;
-
-   return;
-
-disable:
-   initrd_start = initrd_end = 0;
-
-   pr_err(" - disabling initrd\n");
-}
-#endif
-
 void __init mem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
@@ -92,10 +53,6 @@ void __init mem_init(void)
 #endif
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
-#ifdef CONFIG_BLK_DEV_INITRD
-   setup_initrd();
-#endif
-
memblock_free_all();
 
 #ifdef CONFIG_HIGHMEM
-- 
2.47.2




Re: [PATCH] book3s64/radix : Align section vmemmap start address to PAGE_SIZE

2025-03-06 Thread Aneesh Kumar K . V
Donet Tom  writes:

> On 3/3/25 18:32, Aneesh Kumar K.V wrote:
>> Donet Tom  writes:
>>
>>> A vmemmap altmap is a device-provided region used to provide
>>> backing storage for struct pages. For each namespace, the altmap
>>> should belong to that same namespace. If the namespaces are
>>> created unaligned, there is a chance that the section vmemmap
>>> start address could also be unaligned. If the section vmemmap
>>> start address is unaligned, the altmap page allocated from the
>>> current namespace might be used by the previous namespace also.
>>> During the free operation, since the altmap is shared between two
>>> namespaces, the previous namespace may detect that the page does
>>> not belong to its altmap and incorrectly assume that the page is a
>>> normal page. It then attempts to free the normal page, which leads
>>> to a kernel crash.
>>>
>>> In this patch, we are aligning the section vmemmap start address
>>> to PAGE_SIZE. After alignment, the start address will not be
>>> part of the current namespace, and a normal page will be allocated
>>> for the vmemmap mapping of the current section. For the remaining
>>> sections, altmaps will be allocated. During the free operation,
>>> the normal page will be correctly freed.
>>>
>>> Without this patch
>>> ==
>>> NS1 start   NS2 start
>>>   _
>>> | NS1   |NS2  |
>>>   -
>>> | Altmap| Altmap | .|Altmap| Altmap | ...
>>> |  NS1  |  NS1   |  | NS2  |  NS2   |
>>>
>>  ^^^ this should be allocated in ram?
>>
>
> Yes, it should be allocated from RAM. However, in the current
> implementation, an altmap page gets allocated. This is because the
> NS2 vmemmap section's start address is unaligned. There is an
> altmap_cross_boundary() check. Here, from the vmemmap section
> start, we identify the namespace start and check if the namespace start
> is within the boundary. Since it is within the boundary, it returns false,
> causing an altmap page to be allocated. During the PTE update, the
> vmemmap start address is aligned down to PAGE_SIZE, and the PTE is
> updated. As a result, the altmap page is shared between the current
> and previous namespaces.
>
> If we had aligned the vmemmap start address, the
> altmap_cross_boundary() function would return true because the
> vmemmap section's start address belongs to the previous
> namespace. Therefore normal page gets allocated. During the
> PTE set operation, since the address is already aligned, the
> PTE will updated.
>

So the nvdimm driver should ensure that alignment right? I assume other things
will also require that to be properly aligned.?

-aneesh



Re: [RFC kvm-unit-tests PATCH] lib: Use __ASSEMBLER__ instead of __ASSEMBLY__

2025-03-06 Thread Sean Christopherson
On Thu, Mar 06, 2025, Thomas Huth wrote:
> On 22/02/2025 02.45, Sean Christopherson wrote:
> > Convert all non-x86 #ifdefs from __ASSEMBLY__ to __ASSEMBLER__, and remove
> > all manual __ASSEMBLY__ #defines.  __ASSEMBLY_ was inherited blindly from
> > the Linux kernel, and must be manually defined, e.g. through build rules
> > or with the aforementioned explicit #defines in assembly code.
> > 
> > __ASSEMBLER__ on the other hand is automatically defined by the compiler
> > when preprocessing assembly, i.e. doesn't require manually #defines for
> > the code to function correctly.
> > 
> > Ignore x86, as x86 doesn't actually rely on __ASSEMBLY__ at the moment,
> > and is undergoing a parallel cleanup.
> > 
> > Signed-off-by: Sean Christopherson 
> > ---
> > 
> > Completely untested.  This is essentially a "rage" patch after spending
> > way, way too much time trying to understand why I couldn't include some
> > __ASSEMBLY__ protected headers in x86 assembly files.
> 
> Thanks, applied (after fixing the spot that Andrew mentioned and another one
> that has been merged in between)!
> 
> BTW, do you happen to know why the kernel uses __ASSEMBLY__ and not
> __ASSEMBLER__? Just grown historically, or is there a real reason?

AFAICT, it's purely historical.



Re: [PATCH v3 net-next 01/13] net: enetc: add initial netc-lib driver to support NTMP

2025-03-06 Thread Jakub Kicinski
On Tue,  4 Mar 2025 15:21:49 +0800 Wei Fang wrote:
> +config NXP_NETC_LIB
> + tristate "NETC Library"

Remove the string after "tristate", the user should not be prompted
to make a choice for this, since the consumers "select" this config
directly.

> + help
> +   This module provides common functionalities for both ENETC and NETC
> +   Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc
> +   flower and debugfs interfaces and so on.
> +
> +   If compiled as module (M), the module name is nxp-netc-lib.

Not sure if the help makes sense for an invisible symbol either.

>  config FSL_ENETC
>   tristate "ENETC PF driver"
>   depends on PCI_MSI
> @@ -40,6 +50,7 @@ config NXP_ENETC4
>   select FSL_ENETC_CORE
>   select FSL_ENETC_MDIO
>   select NXP_ENETC_PF_COMMON
> + select NXP_NETC_LIB
>   select PHYLINK
>   select DIMLIB
>   help

> +#pragma pack(1)

please don't blindly pack all structs, only if they are misaligned 
or will otherwise have holes.

> +#if IS_ENABLED(CONFIG_NXP_NETC_LIB)

why the ifdef, all callers select the config option




[PATCH 08/13] xtensa: split out printing of virtual memory layout to a function

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

This will help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/xtensa/mm/init.c | 97 ++-
 1 file changed, 50 insertions(+), 47 deletions(-)

diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index b2587a1a7c46..01577d33e602 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -66,6 +66,55 @@ void __init bootmem_init(void)
memblock_dump_all();
 }
 
+static void __init print_vm_layout(void)
+{
+   pr_info("virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+   "kasan   : 0x%08lx - 0x%08lx  (%5lu MB)\n"
+#endif
+#ifdef CONFIG_MMU
+   "vmalloc : 0x%08lx - 0x%08lx  (%5lu MB)\n"
+#endif
+#ifdef CONFIG_HIGHMEM
+   "pkmap   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+   "fixmap  : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+#endif
+   "lowmem  : 0x%08lx - 0x%08lx  (%5lu MB)\n"
+   ".text   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+   ".rodata : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+   ".data   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+   ".init   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
+   ".bss: 0x%08lx - 0x%08lx  (%5lu kB)\n",
+#ifdef CONFIG_KASAN
+   KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE,
+   KASAN_SHADOW_SIZE >> 20,
+#endif
+#ifdef CONFIG_MMU
+   VMALLOC_START, VMALLOC_END,
+   (VMALLOC_END - VMALLOC_START) >> 20,
+#ifdef CONFIG_HIGHMEM
+   PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE,
+   (LAST_PKMAP*PAGE_SIZE) >> 10,
+   FIXADDR_START, FIXADDR_END,
+   (FIXADDR_END - FIXADDR_START) >> 10,
+#endif
+   PAGE_OFFSET, PAGE_OFFSET +
+   (max_low_pfn - min_low_pfn) * PAGE_SIZE,
+#else
+   min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE,
+#endif
+   ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20,
+   (unsigned long)_text, (unsigned long)_etext,
+   (unsigned long)(_etext - _text) >> 10,
+   (unsigned long)__start_rodata, (unsigned long)__end_rodata,
+   (unsigned long)(__end_rodata - __start_rodata) >> 10,
+   (unsigned long)_sdata, (unsigned long)_edata,
+   (unsigned long)(_edata - _sdata) >> 10,
+   (unsigned long)__init_begin, (unsigned long)__init_end,
+   (unsigned long)(__init_end - __init_begin) >> 10,
+   (unsigned long)__bss_start, (unsigned long)__bss_stop,
+   (unsigned long)(__bss_stop - __bss_start) >> 10);
+}
 
 void __init zones_init(void)
 {
@@ -77,6 +126,7 @@ void __init zones_init(void)
 #endif
};
free_area_init(max_zone_pfn);
+   print_vm_layout();
 }
 
 static void __init free_highpages(void)
@@ -118,53 +168,6 @@ void __init mem_init(void)
high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);
 
memblock_free_all();
-
-   pr_info("virtual kernel memory layout:\n"
-#ifdef CONFIG_KASAN
-   "kasan   : 0x%08lx - 0x%08lx  (%5lu MB)\n"
-#endif
-#ifdef CONFIG_MMU
-   "vmalloc : 0x%08lx - 0x%08lx  (%5lu MB)\n"
-#endif
-#ifdef CONFIG_HIGHMEM
-   "pkmap   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-   "fixmap  : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-#endif
-   "lowmem  : 0x%08lx - 0x%08lx  (%5lu MB)\n"
-   ".text   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-   ".rodata : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-   ".data   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-   ".init   : 0x%08lx - 0x%08lx  (%5lu kB)\n"
-   ".bss: 0x%08lx - 0x%08lx  (%5lu kB)\n",
-#ifdef CONFIG_KASAN
-   KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE,
-   KASAN_SHADOW_SIZE >> 20,
-#endif
-#ifdef CONFIG_MMU
-   VMALLOC_START, VMALLOC_END,
-   (VMALLOC_END - VMALLOC_START) >> 20,
-#ifdef CONFIG_HIGHMEM
-   PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE,
-   (LAST_PKMAP*PAGE_SIZE) >> 10,
-   FIXADDR_START, FIXADDR_END,
-   (FIXADDR_END - FIXADDR_START) >> 10,
-#endif
-   PAGE_OFFSET, PAGE_OFFSET +
-   (max_low_pfn - min_low_pfn) * PAGE_SIZE,
-#else
-   min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE,
-#endif
-   ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20,
-   (unsigned long)_text, (unsigned long)_etext,
-   (unsigned long)(_etext - _text) >> 10,
-   (unsigned long)__start_rodata, (unsigned long)__end_rodata,
-   (unsigned long)(__end_rodata - __start_rodata) >> 10,
-   (unsigned long)_sdata, (unsigned long)_edata,
-   

Re: [PATCH v3 net-next 07/13] net: enetc: check if the RSS hfunc is toeplitz

2025-03-06 Thread Jakub Kicinski
On Tue,  4 Mar 2025 15:21:55 +0800 Wei Fang wrote:
> Both ENETC v1 and ENETC v4 only support the toeplitz algorithm for RSS,
> so add a check for RSS hfunc.

Rejecting unsupported configurations is considered a fix,
please send it to net with a Fixes tag.
-- 
pw-bot: cr



[PATCH 06/13] nios2: move pr_debug() about memory start and end to setup_arch()

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

This will help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/nios2/kernel/setup.c | 2 ++
 arch/nios2/mm/init.c  | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index da122a5fa43b..a4cffbfc1399 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -149,6 +149,8 @@ void __init setup_arch(char **cmdline_p)
memory_start = memblock_start_of_DRAM();
memory_end = memblock_end_of_DRAM();
 
+   pr_debug("%s: start=%lx, end=%lx\n", __func__, memory_start, 
memory_end);
+
setup_initial_init_mm(_stext, _etext, _edata, _end);
init_task.thread.kregs = &fake_regs;
 
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index a2278485de19..aa692ad30044 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -65,8 +65,6 @@ void __init mem_init(void)
unsigned long end_mem   = memory_end; /* this must not include
kernel stack at top */
 
-   pr_debug("mem_init: start=%lx, end=%lx\n", memory_start, memory_end);
-
end_mem &= PAGE_MASK;
high_memory = __va(end_mem);
 
-- 
2.47.2




[PATCH 01/13] arm: mem_init: use memblock_phys_free() to free DMA memory on SA1111

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

This will help to pull out memblock_free_all() to generic code.

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/arm/mm/init.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 5345d218899a..9aec1cb2386f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -277,14 +277,14 @@ void __init mem_init(void)
 
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 
-   /* this will put all unused low memory onto the freelists */
-   memblock_free_all();
-
 #ifdef CONFIG_SA
/* now that our DMA memory is actually so designated, we can free it */
-   free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL);
+   memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
 #endif
 
+   /* this will put all unused low memory onto the freelists */
+   memblock_free_all();
+
free_highpages();
 
/*
-- 
2.47.2




[PATCH 11/13] arch, mm: streamline HIGHMEM freeing

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

All architectures that support HIGHMEM have their code that frees high
memory pages to the buddy allocator while __free_memory_core() is limited
to freeing only low memory.

There is no actual reason for that. The memory map is completely ready
by the time memblock_free_all() is called and high pages can be released to
the buddy allocator along with low memory.

Remove low memory limit from __free_memory_core() and drop per-architecture
code that frees high memory pages.

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/arc/mm/init.c |  6 +-
 arch/arm/mm/init.c | 29 -
 arch/csky/mm/init.c| 14 --
 arch/microblaze/mm/init.c  | 16 
 arch/mips/mm/init.c| 20 
 arch/powerpc/mm/mem.c  | 14 --
 arch/sparc/mm/init_32.c| 25 -
 arch/x86/include/asm/highmem.h |  3 ---
 arch/x86/include/asm/numa.h|  4 
 arch/x86/include/asm/numa_32.h | 13 -
 arch/x86/mm/Makefile   |  2 --
 arch/x86/mm/highmem_32.c   | 34 --
 arch/x86/mm/init_32.c  | 28 
 arch/xtensa/mm/init.c  | 29 -
 include/linux/mm.h |  1 -
 mm/memblock.c  |  3 +--
 16 files changed, 2 insertions(+), 239 deletions(-)
 delete mode 100644 arch/x86/include/asm/numa_32.h
 delete mode 100644 arch/x86/mm/highmem_32.c

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 05025122e965..11ce638731c9 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -160,11 +160,7 @@ void __init setup_arch_memory(void)
 static void __init highmem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
-   unsigned long tmp;
-
memblock_phys_free(high_mem_start, high_mem_sz);
-   for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++)
-   free_highmem_page(pfn_to_page(tmp));
 #endif
 }
 
@@ -176,8 +172,8 @@ static void __init highmem_init(void)
  */
 void __init mem_init(void)
 {
-   memblock_free_all();
highmem_init();
+   memblock_free_all();
 
BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d4bcc745a044..7bb5ce02b9b5 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -237,33 +237,6 @@ static inline void poison_init_mem(void *s, size_t count)
*p++ = 0xe7fddef0;
 }
 
-static void __init free_highpages(void)
-{
-#ifdef CONFIG_HIGHMEM
-   unsigned long max_low = max_low_pfn;
-   phys_addr_t range_start, range_end;
-   u64 i;
-
-   /* set highmem page free */
-   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
-   &range_start, &range_end, NULL) {
-   unsigned long start = PFN_UP(range_start);
-   unsigned long end = PFN_DOWN(range_end);
-
-   /* Ignore complete lowmem entries */
-   if (end <= max_low)
-   continue;
-
-   /* Truncate partial highmem entries */
-   if (start < max_low)
-   start = max_low;
-
-   for (; start < end; start++)
-   free_highmem_page(pfn_to_page(start));
-   }
-#endif
-}
-
 /*
  * mem_init() marks the free areas in the mem_map and tells us how much
  * memory is free.  This is done after various parts of the system have
@@ -283,8 +256,6 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
memblock_free_all();
 
-   free_highpages();
-
/*
 * Check boundaries twice: Some fundamental inconsistencies can
 * be detected at build time already.
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index a22801aa503a..3914c2b873da 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -44,21 +44,7 @@ EXPORT_SYMBOL(empty_zero_page);
 
 void __init mem_init(void)
 {
-#ifdef CONFIG_HIGHMEM
-   unsigned long tmp;
-#endif
-
memblock_free_all();
-
-#ifdef CONFIG_HIGHMEM
-   for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
-   struct page *page = pfn_to_page(tmp);
-
-   /* FIXME not sure about */
-   if (!memblock_is_reserved(tmp << PAGE_SHIFT))
-   free_highmem_page(page);
-   }
-#endif
 }
 
 void free_initmem(void)
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 7e2e342e84c5..3e664e0efc33 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -52,19 +52,6 @@ static void __init highmem_init(void)
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
 }
-
-static void __meminit highmem_setup(void)
-{
-   unsigned long pfn;
-
-   

[PATCH 09/13] arch, mm: set max_mapnr when allocating memory map for FLATMEM

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

max_mapnr is essentially the size of the memory map for systems that use
FLATMEM. There is no reason to calculate it in each and every architecture
when it's anyway calculated in alloc_node_mem_map().

Drop setting of max_mapnr from architecture code and set it once in
alloc_node_mem_map().

While on it, move definition of mem_map and max_mapnr to mm/mm_init.c so
there won't be two copies for MMU and !MMU variants.

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/alpha/mm/init.c   |  1 -
 arch/arc/mm/init.c |  5 -
 arch/arm/mm/init.c |  2 --
 arch/csky/mm/init.c|  4 
 arch/loongarch/mm/init.c   |  1 -
 arch/microblaze/mm/init.c  |  4 
 arch/mips/mm/init.c|  8 
 arch/nios2/kernel/setup.c  |  1 -
 arch/nios2/mm/init.c   |  2 +-
 arch/openrisc/mm/init.c|  1 -
 arch/parisc/mm/init.c  |  1 -
 arch/powerpc/kernel/setup-common.c |  2 --
 arch/riscv/mm/init.c   |  1 -
 arch/s390/mm/init.c|  1 -
 arch/sh/mm/init.c  |  1 -
 arch/sparc/mm/init_32.c|  1 -
 arch/um/include/shared/mem_user.h  |  1 -
 arch/um/kernel/physmem.c   | 12 
 arch/um/kernel/um_arch.c   |  1 -
 arch/x86/mm/init_32.c  |  3 ---
 arch/xtensa/mm/init.c  |  1 -
 include/asm-generic/memory_model.h |  5 +++--
 include/linux/mm.h | 11 ---
 mm/memory.c|  8 
 mm/mm_init.c   | 25 +
 mm/nommu.c |  4 
 26 files changed, 21 insertions(+), 86 deletions(-)

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 61c2198b1359..ec0eeae9c653 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -276,7 +276,6 @@ srm_paging_stop (void)
 void __init
 mem_init(void)
 {
-   set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
memblock_free_all();
 }
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 6a71b23f1383..7ef883d58dc1 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -154,11 +154,6 @@ void __init setup_arch_memory(void)
 
arch_pfn_offset = min(min_low_pfn, min_high_pfn);
kmap_init();
-
-#else /* CONFIG_HIGHMEM */
-   /* pfn_valid() uses this when FLATMEM=y and HIGHMEM=n */
-   max_mapnr = max_low_pfn - min_low_pfn;
-
 #endif /* CONFIG_HIGHMEM */
 
free_area_init(max_zone_pfn);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9aec1cb2386f..d4bcc745a044 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -275,8 +275,6 @@ void __init mem_init(void)
swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE);
 #endif
 
-   set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
-
 #ifdef CONFIG_SA
/* now that our DMA memory is actually so designated, we can free it */
memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index ab51acbc19b2..ba6694d6170a 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -46,10 +46,6 @@ void __init mem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
unsigned long tmp;
-
-   set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET);
-#else
-   set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
 #endif
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index ca5aa5f46a9f..00449df50db1 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -78,7 +78,6 @@ void __init paging_init(void)
 
 void __init mem_init(void)
 {
-   max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
memblock_free_all();
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 4520c5741579..857cd2b44bcf 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -104,17 +104,13 @@ void __init setup_memory(void)
 *
 * min_low_pfn - the first page (mm/bootmem.c - node_boot_start)
 * max_low_pfn
-* max_mapnr - the first unused page (mm/bootmem.c - node_low_pfn)
 */
 
/* memory start is from the kernel end (aligned) to higher addr */
min_low_pfn = memory_start >> PAGE_SHIFT; /* minimum for allocation */
-   /* RAM is assumed contiguous */
-   max_mapnr = memory_size >> PAGE_SHIFT;
max_low_pfn = ((u64)memory_start + (u64)lowmem_size) >> PAGE_SHIFT;
max_pfn = ((u64)memory_start + (u64)memory_size) >> PAGE_SHIFT;
 
-   pr_info("%s: max_mapnr: %#lx\n", __func__, max_mapnr);
pr_info("%s: min_low_pfn: %#lx\n", __func__, min_low_pfn);
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
diff --git

[PATCH 00/13] arch, mm: reduce code duplication in mem_init()

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

Hi,

Every architecture has implementation of mem_init() function and some
even more than one. All these release free memory to the buddy
allocator, most of them set high_memory to the end of directly
addressable memory and many of them set max_mapnr for FLATMEM case.

These patches pull the commonalities into the generic code and refactor
some of the mem_init() implementations so that many of them can be just
dropped.

Mike Rapoport (Microsoft) (13):
  arm: mem_init: use memblock_phys_free() to free DMA memory on SA
  csky: move setup_initrd() to setup.c
  hexagon: move initialization of init_mm.context init to paging_init()
  MIPS: consolidate mem_init() for NUMA machines
  MIPS: make setup_zero_pages() use memblock
  nios2: move pr_debug() about memory start and end to setup_arch()
  s390: make setup_zero_pages() use memblock
  xtensa: split out printing of virtual memory layout to a function
  arch, mm: set max_mapnr when allocating memory map for FLATMEM
  arch, mm: set high_memory in free_area_init()
  arch, mm: streamline HIGHMEM freeing
  arch, mm: introduce arch_mm_preinit
  arch, mm: make releasing of memory to page allocator more explicit

 arch/alpha/mm/init.c   |  8 
 arch/arc/mm/init.c | 25 +--
 arch/arm/mm/init.c | 43 +--
 arch/arm/mm/mmu.c  |  2 -
 arch/arm/mm/nommu.c|  1 -
 arch/arm64/mm/init.c   | 12 +-
 arch/csky/kernel/setup.c   | 43 +++
 arch/csky/mm/init.c| 67 --
 arch/hexagon/mm/init.c | 32 ++
 arch/loongarch/kernel/numa.c   |  6 ---
 arch/loongarch/mm/init.c   |  8 
 arch/m68k/mm/init.c|  4 --
 arch/m68k/mm/mcfmmu.c  |  1 -
 arch/m68k/mm/motorola.c|  2 -
 arch/m68k/sun3/config.c|  1 -
 arch/microblaze/mm/init.c  | 25 ---
 arch/mips/include/asm/mmzone.h |  2 -
 arch/mips/loongson64/numa.c|  7 
 arch/mips/mm/init.c| 49 --
 arch/mips/sgi-ip27/ip27-memory.c   |  9 
 arch/nios2/kernel/setup.c  |  3 +-
 arch/nios2/mm/init.c   | 16 +--
 arch/openrisc/mm/init.c|  6 ---
 arch/parisc/mm/init.c  |  4 --
 arch/powerpc/kernel/setup-common.c |  3 --
 arch/powerpc/mm/mem.c  | 18 +---
 arch/riscv/mm/init.c   |  5 +--
 arch/s390/mm/init.c| 18 +---
 arch/sh/mm/init.c  | 10 -
 arch/sparc/mm/init_32.c| 31 +-
 arch/sparc/mm/init_64.c|  4 --
 arch/um/include/shared/mem_user.h  |  1 -
 arch/um/kernel/mem.c   |  9 ++--
 arch/um/kernel/physmem.c   | 12 --
 arch/um/kernel/um_arch.c   |  2 -
 arch/x86/include/asm/highmem.h |  3 --
 arch/x86/include/asm/numa.h|  4 --
 arch/x86/include/asm/numa_32.h | 13 --
 arch/x86/kernel/setup.c|  2 -
 arch/x86/mm/Makefile   |  2 -
 arch/x86/mm/highmem_32.c   | 34 ---
 arch/x86/mm/init_32.c  | 41 ++
 arch/x86/mm/init_64.c  |  7 ++--
 arch/x86/mm/numa_32.c  |  3 --
 arch/xtensa/mm/init.c  | 66 +++--
 include/asm-generic/memory_model.h |  5 ++-
 include/linux/memblock.h   |  1 -
 include/linux/mm.h | 13 +-
 mm/internal.h  |  3 +-
 mm/memblock.c  |  3 +-
 mm/memory.c| 16 ---
 mm/mm_init.c   | 58 ++
 mm/nommu.c |  6 ---
 53 files changed, 151 insertions(+), 618 deletions(-)
 delete mode 100644 arch/x86/include/asm/numa_32.h
 delete mode 100644 arch/x86/mm/highmem_32.c


base-commit: d082ecbc71e9e0bf49883ee4afd435a77a5101b6
-- 
2.47.2




[PATCH 07/13] s390: make setup_zero_pages() use memblock

2025-03-06 Thread Mike Rapoport
From: "Mike Rapoport (Microsoft)" 

Allocating the zero pages from memblock is simpler because the memory is
already reserved.

This will also help with pulling out memblock_free_all() to the generic
code and reducing code duplication in arch::mem_init().

Signed-off-by: Mike Rapoport (Microsoft) 
---
 arch/s390/mm/init.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f2298f7a3f21..020aa2f78d01 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -73,8 +73,6 @@ static void __init setup_zero_pages(void)
 {
unsigned long total_pages = memblock_estimated_nr_free_pages();
unsigned int order;
-   struct page *page;
-   int i;
 
/* Latest machines require a mapping granularity of 512KB */
order = 7;
@@ -83,17 +81,10 @@ static void __init setup_zero_pages(void)
while (order > 2 && (total_pages >> 10) < (1UL << order))
order--;
 
-   empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+   empty_zero_page = (unsigned long)memblock_alloc(PAGE_SIZE << order, 
order);
if (!empty_zero_page)
panic("Out of memory in setup_zero_pages");
 
-   page = virt_to_page((void *) empty_zero_page);
-   split_page(page, order);
-   for (i = 1 << order; i > 0; i--) {
-   mark_page_reserved(page);
-   page++;
-   }
-
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
 
@@ -176,9 +167,10 @@ void __init mem_init(void)
pv_init();
kfence_split_mapping();
 
+   setup_zero_pages(); /* Setup zeroed pages. */
+
/* this will put all low memory onto the freelists */
memblock_free_all();
-   setup_zero_pages(); /* Setup zeroed pages. */
 }
 
 unsigned long memory_block_size_bytes(void)
-- 
2.47.2




Re: [PATCH 00/13] arch, mm: reduce code duplication in mem_init()

2025-03-06 Thread Andrew Morton
On Thu,  6 Mar 2025 20:51:10 +0200 Mike Rapoport  wrote:

> Every architecture has implementation of mem_init() function and some
> even more than one. All these release free memory to the buddy
> allocator, most of them set high_memory to the end of directly
> addressable memory and many of them set max_mapnr for FLATMEM case.
> 
> These patches pull the commonalities into the generic code and refactor
> some of the mem_init() implementations so that many of them can be just
> dropped.

Thanks, I added this series to mm.git.



Re: [EXTERNAL] Re: [PATCH v4 1/2] watchdog: Add a new flag WDIOF_STOP_MAYSLEEP

2025-03-06 Thread Guenter Roeck
On Thu, Mar 06, 2025 at 12:18:20PM +, George Cherian wrote:
> Hi Guenter,
> 
> I am summarizing the topics we discussed in multiple threads here.
> 
> >>On 3/5/25 03:01, Ahmad Fatoum wrote:
> >> Hi George,
> >> Hi Guenter,
> >> 
> >> On 05.03.25 11:34, George Cherian wrote:
>  why is armada_37xx_wdt also here?
>  The stop function in that driver may not sleep.
> >>> Marek,
> >>>
> >>> Thanks for reviewing.
> >>> Since the stop function has a regmap_write(), I thought it might sleep.
> >>> Now that you pointed it out, I assume that it is an MMIO based regmap 
> >>> being used for armada.
> >>> I will update the same in the next version
> >> 
> >> Failure to add WDIOF_STOP_MAYSLEEP when it's needed can lead to
> >> kernel hanging. Failure to add an alternative WDIOF_STOP_ATOMIC
> >> would lead to the kernel option being a no-op.
> >> 
> >> I think a no-op stop_on_panic (or reset_on_panic) is a saner default.
> >> 
> >
> >Agreed. Also, I like WDIOF_STOP_ATOMIC more than the WDIOF_STOP_NOSLEEP
> >I had suggested in my other response.
> 
> 1. Instead of blacklisting drivers as WDIOF_STOP_MAYSLEEP, the option will an 
> opt-in.
> 2. This may not be WDIOF_STOP_AOMIC, instead would be a generic flag not 
> limited to STOP
> operation. May be WDIOF_OPS_ATOMIC (OPS include - .start, .stop, 
> .set_timeout, .ping)

I don't see a value in this because AFAICS atomic operation is only needed when
stopping the watchdog. At least in theory, some watchdogs might need to sleep
for other functions, but not for the stop operation. Please provide a rationale.

> 3. Remove the kernel command line option (stop_on_panic) and have a generic 
> reset_on_panic.
> 4. reset_on_panic=60 (by default )  meaning on a panic the wdog timeout is 
> updated to 60sec
>  or the clamp_t(reset_on_panic, min, max_hw_heartbeat_ms).

Default should be the current behavior, that the watchdog keeps running with the
configured timeout.

> 5. if reset_on_panic=0, it means the watchdog is stopped on panic.

If we need both a panic timeout and the ability to disable the watchdog entirely
on panic, there should be two parameters - one to select the watchdog timeout
on panic, and one to disable the watchdog entirely on panic. If there is only
one parameter, it should be the watchdog timeout on panic, with ==0 meaning
"keep the configured timeout" (i.e., the current behavior).

Thanks,
Guenter



Re: [PATCH v5 2/3] printf: break kunit into test cases

2025-03-06 Thread Tamir Duberstein
On Thu, Mar 6, 2025 at 11:44 AM Petr Mladek  wrote:
>
> On Fri 2025-02-21 15:34:31, Tamir Duberstein wrote:
> > Move all tests into `printf_test_cases`. This gives us nicer output in
> > the event of a failure.
> >
> > Combine `plain_format` and `plain_hash` into `hash_pointer` since
> > they're testing the same scenario.
> >
> > Signed-off-by: Tamir Duberstein 
> > ---
> >  lib/tests/printf_kunit.c | 331 
> > +--
> >  1 file changed, 121 insertions(+), 210 deletions(-)
> >
> > diff --git a/lib/tests/printf_kunit.c b/lib/tests/printf_kunit.c
> > index 287bbfb61148..013df6f6dd49 100644
> > --- a/lib/tests/printf_kunit.c
> > +++ b/lib/tests/printf_kunit.c
> > @@ -38,13 +38,8 @@ static unsigned int total_tests;
> >  static char *test_buffer;
> >  static char *alloced_buffer;
> >
> > -static struct kunit *kunittest;
> > -
> > -#define tc_fail(fmt, ...) \
> > - KUNIT_FAIL(kunittest, fmt, ##__VA_ARGS__)
> > -
> > -static void __printf(4, 0)
> > -do_test(int bufsize, const char *expect, int elen,
> > +static void __printf(5, 0)
> > +do_test(struct kunit *kunittest, int bufsize, const char *expect, int elen,
> >   const char *fmt, va_list ap)
> >  {
> >   va_list aq;
> > @@ -58,59 +53,64 @@ do_test(int bufsize, const char *expect, int elen,
> [...]
> >
> >   if (memcmp(test_buffer, expect, written)) {
> > - tc_fail("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected 
> > '%.*s'",
> > - bufsize, fmt, test_buffer, written, expect);
> > + KUNIT_FAIL(kunittest, "vsnprintf(buf, %d, \"%s\", ...) wrote 
> > '%s', expected '%.*s'",
> > +bufsize, fmt, test_buffer, written, expect);
> >   return;
> >   }
> >  }
> >
> > -static void __printf(3, 4)
> > -__test(const char *expect, int elen, const char *fmt, ...)
> > +static void __printf(4, 0)
>
> This should be:
>
> static void __printf(4, 5)
>
> The 2nd parameter is zero when the variable list of parameters is
> passed using va_list.

Yeah, thanks for the catch. I fixed this locally after you observed
the same on the scanf-kunit series.

> > +__test(struct kunit *kunittest, const char *expect, int elen, const char 
> > *fmt, ...)
> >  {
> >   va_list ap;
> >   int rand;
> >   char *p;
>
> > @@ -247,89 +225,44 @@ plain_format(void)
> >  #define ZEROS ""
> >  #define ONES ""
> >
> > -static int
> > -plain_format(void)
> > -{
> > - /* Format is implicitly tested for 32 bit machines by plain_hash() */
> > - return 0;
> > -}
> > -
> >  #endif   /* BITS_PER_LONG == 64 */
> >
> > -static int
> > -plain_hash_to_buffer(const void *p, char *buf, size_t len)
> > +static void
> > +plain_hash_to_buffer(struct kunit *kunittest, const void *p, char *buf, 
> > size_t len)
> >  {
> > - int nchars;
> > -
> > - nchars = snprintf(buf, len, "%p", p);
> > -
> > - if (nchars != PTR_WIDTH)
> > - return -1;
> > + KUNIT_ASSERT_EQ(kunittest, snprintf(buf, len, "%p", p), PTR_WIDTH);
> >
> >   if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) {
> >   kunit_warn(kunittest, "crng possibly not yet initialized. 
> > plain 'p' buffer contains \"%s\"",
> >   PTR_VAL_NO_CRNG);
> > - return 0;
> >   }
> > -
> > - return 0;
> >  }
> >
> > -static int
> > -plain_hash(void)
> > -{
> > - char buf[PLAIN_BUF_SIZE];
> > - int ret;
> > -
> > - ret = plain_hash_to_buffer(PTR, buf, PLAIN_BUF_SIZE);
> > - if (ret)
> > - return ret;
> > -
> > - if (strncmp(buf, PTR_STR, PTR_WIDTH) == 0)
> > - return -1;
> > -
> > - return 0;
> > -}
> > -
> > -/*
> > - * We can't use test() to test %p because we don't know what output to 
> > expect
> > - * after an address is hashed.
> > - */
> >  static void
> > -plain(void)
> > +hash_pointer(struct kunit *kunittest)
> >  {
> > - int err;
> > + if (no_hash_pointers)
> > + kunit_skip(kunittest, "hash pointers disabled");
> >
> > - if (no_hash_pointers) {
> > - kunit_warn(kunittest, "skipping plain 'p' tests");
> > - return;
> > - }
> > + char buf[PLAIN_BUF_SIZE];
> >
> > - err = plain_hash();
> > - if (err) {
> > - tc_fail("plain 'p' does not appear to be hashed");
> > - return;
> > - }
> > + plain_hash_to_buffer(kunittest, PTR, buf, PLAIN_BUF_SIZE);
> >
> > - err = plain_format();
> > - if (err) {
> > - tc_fail("hashing plain 'p' has unexpected format");
> > - }
> > + /*
> > +  * We can't use test() to test %p because we don't know what output 
> > to expect
> > +  * after an address is hashed.
> > +  */
>
> The code does not longer print a reasonable error message on failure.
> I would extend the comment to make it easier to understand the
> meaning. Also I would use the imperative style. Something like:
>
> /*
>  * The hash of %p is unpredi