[PATCH v3 02/14] include/linux/compiler.h: prefer __section from compiler_attributes.h
GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. This fixes an Oops observed in distro's that use systemd and not net.core.bpf_jit_enable=1, when their kernels are compiled with Clang. Instead, we should: 1. Prefer __section(.section_name_no_quotes). 2. Only use __attribute__((__section__(".section"))) when creating the section name via C preprocessor (see the definition of __define_initcall in arch/um/include/shared/init.h). This antipattern was found with: $ grep -e __section\(\" -e __section__\(\" -r See the discussions in: Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2 Link: https://github.com/ClangBuiltLinux/linux/issues/619 Acked-by: Will Deacon Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers --- include/linux/compiler.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f0fd5636fddb..5e88e7e33abe 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, long __r; \ static struct ftrace_likely_data\ __aligned(4)\ - __section("_ftrace_annotated_branch") \ + __section(_ftrace_annotated_branch) \ __f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __trace_if_value(cond) ({ \ static struct ftrace_branch_data\ __aligned(4)\ - __section("_ftrace_branch") \ + __section(_ftrace_branch) \ __if_trace = { \ .func = __func__, \ .file = __FILE__, \ @@ -118,7 +118,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table") +#define __annotate_jump_table __section(.rodata..c_jump_table) #else #define annotate_reachable() @@ -298,7 +298,7 @@ unsigned long read_word_at_a_time(const void *addr) * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __section(".discard.addressable") __used \ + static void * __section(.discard.addressable) __used \ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; /** -- 2.23.0.187.g17f5b7556c-goog
[PATCH v3 11/14] include/asm-generic: prefer __section from compiler_attributes.h
GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. Instead, we should: 1. Prefer __section(.section_name_no_quotes). 2. Only use __attribute__((__section__(".section"))) when creating the section name via C preprocessor (see the definition of __define_initcall in arch/um/include/shared/init.h). This antipattern was found with: $ grep -e __section\(\" -e __section__\(\" -r See the discussions in: Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2 Link: https://github.com/ClangBuiltLinux/linux/issues/619 Acked-by: Naveen N. Rao Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers --- include/asm-generic/error-injection.h | 2 +- include/asm-generic/kprobes.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 95a159a4137f..a593a50b33e3 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -23,7 +23,7 @@ struct error_injection_entry { */ #define ALLOW_ERROR_INJECTION(fname, _etype) \ static struct error_injection_entry __used \ - __attribute__((__section__("_error_injection_whitelist"))) \ + __section(_error_injection_whitelist) \ _eil_addr_##fname = { \ .addr = (unsigned long)fname, \ .etype = EI_ETYPE_##_etype, \ diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h index 4a982089c95c..20d69719270f 100644 --- a/include/asm-generic/kprobes.h +++ b/include/asm-generic/kprobes.h @@ -9,12 +9,11 @@ * by using this macro. */ # define __NOKPROBE_SYMBOL(fname) \ -static unsigned long __used\ - __attribute__((__section__("_kprobe_blacklist"))) \ +static unsigned long __used __section(_kprobe_blacklist) \ _kbl_addr_##fname = (unsigned long)fname; # define NOKPROBE_SYMBOL(fname)__NOKPROBE_SYMBOL(fname) /* Use this to forbid a kprobes attach on very low level functions */ -# define __kprobes __attribute__((__section__(".kprobes.text"))) +# define __kprobes __section(.kprobes.text) # define nokprobe_inline __always_inline #else # define NOKPROBE_SYMBOL(fname) -- 2.23.0.187.g17f5b7556c-goog
[PATCH v3 12/14] include/linux: prefer __section from compiler_attributes.h
GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. Instead, we should: 1. Prefer __section(.section_name_no_quotes). 2. Only use __attribute__((__section__(".section"))) when creating the section name via C preprocessor (see the definition of __define_initcall in arch/um/include/shared/init.h). This antipattern was found with: $ grep -e __section\(\" -e __section__\(\" -r See the discussions in: Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2 Link: https://github.com/ClangBuiltLinux/linux/issues/619 Acked-by: Will Deacon Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers --- include/linux/cache.h | 6 +++--- include/linux/cpu.h | 2 +- include/linux/export.h | 2 +- include/linux/init_task.h | 4 ++-- include/linux/interrupt.h | 5 ++--- include/linux/sched/debug.h | 2 +- include/linux/srcutree.h| 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/cache.h b/include/linux/cache.h index 750621e41d1c..3f4df9eef1e1 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -28,7 +28,7 @@ * but may get written to during init, so can't live in .rodata (via "const"). */ #ifndef __ro_after_init -#define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) +#define __ro_after_init __section(.data..ro_after_init) #endif #ifndef cacheline_aligned @@ -45,8 +45,8 @@ #ifndef __cacheline_aligned #define __cacheline_aligned\ - __attribute__((__aligned__(SMP_CACHE_BYTES), \ -__section__(".data..cacheline_aligned"))) + __aligned(SMP_CACHE_BYTES) \ + __section(.data..cacheline_aligned) #endif /* __cacheline_aligned */ #ifndef __cacheline_aligned_in_smp diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fcb1386bb0d4..186bbd79d6ce 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -166,7 +166,7 @@ void cpu_startup_entry(enum cpuhp_state state); void cpu_idle_poll_ctrl(bool enable); /* Attach to any functions which should be considered cpuidle. */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#define __cpuidle __section(.cpuidle.text) bool cpu_in_idle(unsigned long pc); diff --git a/include/linux/export.h b/include/linux/export.h index fd8711ed9ac4..808c1a0c2ef9 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -104,7 +104,7 @@ struct kernel_symbol { * discarded in the final link stage. */ #define __ksym_marker(sym) \ - static int __ksym_marker_##sym[0] __section(".discard.ksym") __used + static int __ksym_marker_##sym[0] __section(.discard.ksym) __used #define __EXPORT_SYMBOL(sym, sec) \ __ksym_marker(sym); \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6049baa5b8bc..50139505da34 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -51,12 +51,12 @@ extern struct cred init_cred; /* Attach to the init_task data structure for proper alignment */ #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK -#define __init_task_data __attribute__((__section__(".data..init_task"))) +#define __init_task_data __section(.data..init_task) #else #define __init_task_data /**/ #endif /* Attach to the thread_info data structure for proper alignment */ -#define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) +#define __init_thread_info __section(.data..init_thread_info) #endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5b8328a99b2a..29debfe4dd0f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -741,8 +741,7 @@ extern int arch_early_irq_init(void); /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ -#define __irq_entry __attribute__((__section__(".irqentry.text"))) -#define __softirq_entry \ - __attribute__((__section__(".softirqentry.text"))) +#define __irq_entry__section(.irqentry.text) +#define __softirq_entry__section(.softirqentry.text) #endif diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 95fb9e025247..e17b66221fdd 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -42,7 +42,7 @@ extern void proc_sched_set_task(struct task_struct *p); #endif /* Attach to any functions which should be ignored in wchan output. */ -#define __sched__attribute__((__section__(".sched.text"))) +#define __sched__section(.sched.text) /* Linker adds these: start and end of __sched functions */ extern char __sched_text_start[], __sched_text_end[
[PATCH v3 03/14] parisc: prefer __section from compiler_attributes.h
GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. Instead, we should: 1. Prefer __section(.section_name_no_quotes). 2. Only use __attribute__((__section__(".section"))) when creating the section name via C preprocessor (see the definition of __define_initcall in arch/um/include/shared/init.h). This antipattern was found with: $ grep -e __section\(\" -e __section__\(\" -r See the discussions in: Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2 Link: https://github.com/ClangBuiltLinux/linux/issues/619 Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Signed-off-by: Nick Desaulniers --- arch/parisc/include/asm/cache.h | 2 +- arch/parisc/include/asm/ldcw.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 73ca89a47f49..e5de3f897633 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,7 +22,7 @@ #define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index 3eb4bfc1fb36..e080143e79a3 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -52,7 +52,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __attribute__((__section__(".data..lock_aligned"))) +# define __lock_aligned __section(.data..lock_aligned) #endif #endif /* __PARISC_LDCW_H */ -- 2.23.0.187.g17f5b7556c-goog
Re: mmotm 2019-08-27-20-39 uploaded (sound/hda/intel-nhlt.c)
On 8/28/19 3:45 PM, Pierre-Louis Bossart wrote: > >>>> I just checked with Mark Brown's for-next tree >>>> 8aceffa09b4b9867153bfe0ff6f40517240cee12 >>>> and things are fine in i386 mode, see below. >>>> >>>> next-20190828 also works fine for me in i386 mode. >>>> >>>> if you can point me to a tree and configuration that don't work I'll look >>>> into this, I'd need more info to progress. >>> >>> Please try the attached randconfig file. >>> >>> Thanks for looking. >> >> Ack, I see some errors as well with this config. Likely a missing dependency >> somewhere, working on this now. > > My bad, I added a fallback with static inline functions in the .h file when > ACPI is not defined, but the .c file was still compiled. > > The diff below makes next-20190828 compile with Randy's config. > > It looks like the alsa-devel server is down btw? > > diff --git a/sound/hda/Makefile b/sound/hda/Makefile > index 8560f6ef1b19..b3af071ce06b 100644 > --- a/sound/hda/Makefile > +++ b/sound/hda/Makefile > @@ -14,5 +14,7 @@ obj-$(CONFIG_SND_HDA_CORE) += snd-hda-core.o > #extended hda > obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/ > > +ifdef CONFIG_ACPI > snd-intel-nhlt-objs := intel-nhlt.o > obj-$(CONFIG_SND_INTEL_NHLT) += snd-intel-nhlt.o > +endif > works for me. Thanks. Acked-by: Randy Dunlap # build-tested -- ~Randy
Re: [PATCH net-next 05/15] net: sgi: ioc3-eth: allocate space for desc rings only once
On Wed, 28 Aug 2019 16:03:04 +0200, Thomas Bogendoerfer wrote: > Memory for descriptor rings are allocated/freed, when interface is > brought up/down. Since the size of the rings is not changeable by > hardware, we now allocate rings now during probe and free it, when > device is removed. > > Signed-off-by: Thomas Bogendoerfer So the rings still get freed and allocated from ioc3_init() but there's a set allocated from the start? I guess that makes some sense.. Most drivers will allocate rings in open() and free them in close().
Re: [PATCH net-next] net/ncsi: add response handlers for PLDM over NC-SI
From: Ben Wei Date: Tue, 27 Aug 2019 23:03:53 + > This patch adds handlers for PLDM over NC-SI command response. > > This enables NC-SI driver recognizes the packet type so the responses don't > get dropped as unknown packet type. > > PLDM over NC-SI are not handled in kernel driver for now, but can be passed > back to user space via Netlink for further handling. > > Signed-off-by: Ben Wei I don't know why but patchwork puts part of your patch into the commit message, see: https://patchwork.ozlabs.org/patch/1154104/ It's probably an encoding issue or similar. > +static int ncsi_rsp_handler_pldm(struct ncsi_request *nr) { > + return 0; > +} > + > static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) { I know other functions in this file do it, but please put the openning curly braces of a function on a separate line. Thank you.
Re: [PATCH net-next 06/15] net: sgi: ioc3-eth: get rid of ioc3_clean_rx_ring()
On Wed, 28 Aug 2019 16:03:05 +0200, Thomas Bogendoerfer wrote: > Clean rx ring is just called once after a new ring is allocated, which > is per definition clean. So there is not need for this function. > > Signed-off-by: Thomas Bogendoerfer > --- > drivers/net/ethernet/sgi/ioc3-eth.c | 21 - > 1 file changed, 21 deletions(-) > > diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c > b/drivers/net/ethernet/sgi/ioc3-eth.c > index 6ca560d4ab79..39631e067b71 100644 > --- a/drivers/net/ethernet/sgi/ioc3-eth.c > +++ b/drivers/net/ethernet/sgi/ioc3-eth.c > @@ -761,26 +761,6 @@ static void ioc3_mii_start(struct ioc3_private *ip) > add_timer(&ip->ioc3_timer); > } > > -static inline void ioc3_clean_rx_ring(struct ioc3_private *ip) > -{ > - struct ioc3_erxbuf *rxb; > - struct sk_buff *skb; > - int i; > - > - for (i = ip->rx_ci; i & 15; i++) { > - ip->rx_skbs[ip->rx_pi] = ip->rx_skbs[ip->rx_ci]; > - ip->rxr[ip->rx_pi++] = ip->rxr[ip->rx_ci++]; > - } > - ip->rx_pi &= RX_RING_MASK; > - ip->rx_ci &= RX_RING_MASK; > - > - for (i = ip->rx_ci; i != ip->rx_pi; i = (i + 1) & RX_RING_MASK) { > - skb = ip->rx_skbs[i]; > - rxb = (struct ioc3_erxbuf *)(skb->data - RX_OFFSET); > - rxb->w0 = 0; There's gotta be some purpose to setting this w0 word to zero no? ioc3_rx() uses that to see if the descriptor is done, and dutifully clears it after.. > - } > -} > - > static inline void ioc3_clean_tx_ring(struct ioc3_private *ip) > { > struct sk_buff *skb; > @@ -860,7 +840,6 @@ static void ioc3_init_rings(struct net_device *dev) > ioc3_free_rings(ip); > ioc3_alloc_rings(dev); > > - ioc3_clean_rx_ring(ip); > ioc3_clean_tx_ring(ip); > > /* Now the rx ring base, consume & produce registers. */
Re: [PATCH net v3 0/2] r8152: fix side effect
From: Hayes Wang Date: Wed, 28 Aug 2019 09:51:40 +0800 > v3: > Update the commit message for patch #1. > > v2: > Replace patch #2 with "r8152: remove calling netif_napi_del". > > v1: > The commit 0ee1f4734967 ("r8152: napi hangup fix after disconnect") > add a check to avoid using napi_disable after netif_napi_del. However, > the commit ffa9fec30ca0 ("r8152: set RTL8152_UNPLUG only for real > disconnection") let the check useless. > > Therefore, I revert commit 0ee1f4734967 ("r8152: napi hangup fix > after disconnect") first, and add another patch to fix it. Series applied, thank you.
Re: [PATCH] x86/mm/cpa: Prevent large page split when ftrace flips RW on kernel text
> On Aug 28, 2019, at 3:31 PM, Thomas Gleixner wrote: > > ftrace does not use text_poke() for enabling trace functionality. It uses > its own mechanism and flips the whole kernel text to RW and back to RO. > > The CPA rework removed a loop based check of 4k pages which tried to > preserve a large page by checking each 4k page whether the change would > actually cover all pages in the large page. > > This resulted in endless loops for nothing as in testing it turned out that > it actually never preserved anything. Of course testing missed to include > ftrace, which is the one and only case which benefitted from the 4k loop. > > As a consequence enabling function tracing or ftrace based kprobes results > in a full 4k split of the kernel text, which affects iTLB performance. > > The kernel RO protection is the only valid case where this can actually > preserve large pages. > > All other static protections (RO data, data NX, PCI, BIOS) are truly > static. So a conflict with those protections which results in a split > should only ever happen when a change of memory next to a protected region > is attempted. But these conflicts are rightfully splitting the large page > to preserve the protected regions. In fact a change to the protected > regions itself is a bug and is warned about. > > Add an exception for the static protection check for kernel text RO when > the to be changed region spawns a full large page which allows to preserve > the large mappings. This also prevents the syslog to be spammed about CPA > violations when ftrace is used. > > The exception needs to be removed once ftrace switched over to text_poke() > which avoids the whole issue. > > Fixes: 585948f4f695 ("x86/mm/cpa: Avoid the 4k pages check completely") > Reported-by: Song Liu > Signed-off-by: Thomas Gleixner > Cc: sta...@vger.kernel.org This looks great. Much cleaner than my workaround. Thanks! Reviewed-and-tested-by: Song Liu We need this for v4.20 to v5.3 (assuming Peter's patches will land in 5.4). > --- > arch/x86/mm/pageattr.c | 26 ++ > 1 file changed, 18 insertions(+), 8 deletions(-) > > --- a/arch/x86/mm/pageattr.c > +++ b/arch/x86/mm/pageattr.c > @@ -516,7 +516,7 @@ static inline void check_conflict(int wa > */ > static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, > unsigned long pfn, unsigned long npg, > - int warnlvl) > + unsigned long lpsize, int warnlvl) > { > pgprotval_t forbidden, res; > unsigned long end; > @@ -535,9 +535,17 @@ static inline pgprot_t static_protection > check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); > forbidden = res; > > - res = protect_kernel_text_ro(start, end); > - check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); > - forbidden |= res; > + /* > + * Special case to preserve a large page. If the change spawns the > + * full large page mapping then there is no point to split it > + * up. Happens with ftrace and is going to be removed once ftrace > + * switched to text_poke(). > + */ > + if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) { > + res = protect_kernel_text_ro(start, end); > + check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); > + forbidden |= res; > + } > > /* Check the PFN directly */ > res = protect_pci_bios(pfn, pfn + npg - 1); > @@ -819,7 +827,7 @@ static int __should_split_large_page(pte >* extra conditional required here. >*/ > chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, > - CPA_CONFLICT); > + psize, CPA_CONFLICT); > > if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { > /* > @@ -855,7 +863,7 @@ static int __should_split_large_page(pte >* protection requirement in the large page. >*/ > new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, > - CPA_DETECT); > + psize, CPA_DETECT); > > /* >* If there is a conflict, split the large page. > @@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_dat > if (!cpa->force_static_prot) > goto set; > > - prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT); > + /* Hand in lpsize = 0 to enforce the protection mechanism */ > + prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT); > > if (pgprot_val(prot) == pgprot_val(ref_prot)) > goto set; > @@ -1503,7 +1512,8 @@ static int __change_page_attr(struct cpa > pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); > > cpa_inc_4k_install(); > - new_prot = stati
Re: [PATCH net-next 09/15] net: sgi: ioc3-eth: split ring cleaning/freeing and allocation
On Wed, 28 Aug 2019 16:03:08 +0200, Thomas Bogendoerfer wrote: > Do tx ring cleaning and freeing of rx buffers, when chip is shutdown and > allocate buffers before bringing chip up. > > Signed-off-by: Thomas Bogendoerfer Ah, so you are moving the alloc/free to open/close, that's good.
Re: [PATCH] sky2: Disable MSI on yet another ASUS boards (P6Xxxx)
From: Takashi Iwai Date: Wed, 28 Aug 2019 08:31:19 +0200 > A similar workaround for the suspend/resume problem is needed for yet > another ASUS machines, P6X models. Like the previous fix, the BIOS > doesn't provide the standard DMI_SYS_* entry, so again DMI_BOARD_* > entries are used instead. > > Reported-and-tested-by: SteveM > Signed-off-by: Takashi Iwai Applied, but this is getting suspicious. It looks like MSI generally is not restored properly on resume on these boards, so maybe there simply needs to be a generic PCI quirk for that?
Re: [PATCH v2] kbuild: enable unused-function warnings for W= build with Clang
On Tue, Aug 27, 2019 at 7:58 PM Masahiro Yamada wrote: > On Wed, Aug 28, 2019 at 6:56 AM Nick Desaulniers > wrote: > > Masahiro, does your patch correctly make -Wunused-function work for > > clang at W=1? It looks like -Wunused gets added to warning-1, but > > then -Wno-unused-function gets added to KBUILD_CFLAGS after `warning` > > does. Will that work correctly? I'd imagine that at W=1, > > KBUILD_CFLAGS for clang will look like: > > ... -Wunused -Wno-unused-function ... > > which is probably not what we want? > > Hmm? > > -Wunused is added only when W=1. > > -Wno-unused-function is added only when W= was not passed. > > They do not happen at the same time. Acked-by: Nick Desaulniers -- Thanks, ~Nick Desaulniers
[PATCH][V2] arcnet: capmode: remove redundant assignment to pointer pkt
From: Colin Ian King Pointer pkt is being initialized with a value that is never read and pkt is being re-assigned a little later on. The assignment is redundant and hence can be removed. Addresses-Coverity: ("Ununsed value") Signed-off-by: Colin Ian King --- V2: fix typo in patch description, pkg -> pkt --- drivers/net/arcnet/capmode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c index b780be6f41ff..c09b567845e1 100644 --- a/drivers/net/arcnet/capmode.c +++ b/drivers/net/arcnet/capmode.c @@ -44,7 +44,7 @@ static void rx(struct net_device *dev, int bufnum, { struct arcnet_local *lp = netdev_priv(dev); struct sk_buff *skb; - struct archdr *pkt = pkthdr; + struct archdr *pkt; char *pktbuf, *pkthdrbuf; int ofs; -- 2.20.1
Re: [PATCH net v4 0/2] r8152: fix side effect
From: Hayes Wang Date: Wed, 28 Aug 2019 20:56:11 +0800 > v4: > Add Fixes tag for both patch #1 and #2. I applied v3, sorry. I think it is OK as I will backport things to v5.2 -stable anyways.
Re: [PATCH 08/15] sched,fair: simplify timeslice length code
On Wed, 2019-08-28 at 19:32 +0200, Vincent Guittot wrote: > On Thu, 22 Aug 2019 at 04:18, Rik van Riel wrote: > > The idea behind __sched_period makes sense, but the results do not > > always. > > > > When a CPU has one high priority task and a large number of low > > priority > > tasks, __sched_period will return a value larger than > > sysctl_sched_latency, > > and the one high priority task may end up getting a timeslice all > > for itself > > that is also much larger than sysctl_sched_latency. > > note that unless you enable sched_feat(HRTICK), the sched_slice is > mainly used to decide how fast we preempt running task at tick but a > newly wake up task can preempt it before > > > The low priority tasks will have their time slices rounded up to > > sysctl_sched_min_granularity, resulting in an even larger > > scheduling > > latency than targeted by __sched_period. > > Will this not break the fairness between a always running task and a > short sleeping one with this changes ? In what way? The vruntime for the always running task will continue to advance the same way it always has. > > Simplify the code by simply ripping out __sched_period and always > > taking > > fractions of sysctl_sched_latency. > > > > If a high priority task ends up getting a "too small" time slice > > compared > > to low priority tasks, the vruntime scaling ensures that it will > > simply > > get scheduled more frequently than low priority tasks. > > Will you not increase the number of context switch ? It should actually decrease the number of context switches. If a nice +19 task gets a longer time slice than it would today, its vruntime will be advanced by more than sysctl_sched_latency, and it will not get to run again until another task has caught up with its vruntime. That means the regular (or high) priority task that shares the CPU with that nice +19 task might get several time slices in a row until the nice +19 task gets to run again. What am I overlooking? -- All Rights Reversed. signature.asc Description: This is a digitally signed message part
Re: [PATCH net-next 00/12] net: hns3: add some cleanups and optimizations
On Wed, 28 Aug 2019 22:23:04 +0800, Huazhong Tan wrote: > This patch-set includes cleanups, optimizations and bugfix for > the HNS3 ethernet controller driver. The phy loopback (patch 10) could probably benefit from an expert look but in general LGTM.
Re: [patch V3 1/2] x86/mm/pti: Handle unaligned address gracefully in pti_clone_pagetable()
* Thomas Gleixner wrote: > From: Song Liu > > pti_clone_pmds() assumes that the supplied address is either: > > - properly PUD/PMD aligned > or > - the address is actually mapped which means that independently >of the mapping level (PUD/PMD/PTE) the next higher mapping >exists. > > If that's not the case the unaligned address can be incremented by PUD or > PMD size incorrectly. All callers supply mapped and/or aligned addresses, > but for the sake of robustness it's better to handle that case properly and > to emit a warning. > > [ tglx: Rewrote changelog and added WARN_ON_ONCE() ] > > Signed-off-by: Song Liu > Signed-off-by: Thomas Gleixner > --- > V2: Negate P[UM]D_MASK for checking whether the offset part is 0 > V3: Fix changelog > --- > arch/x86/mm/pti.c |6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > --- a/arch/x86/mm/pti.c > +++ b/arch/x86/mm/pti.c > @@ -330,13 +330,15 @@ pti_clone_pgtable(unsigned long start, u > > pud = pud_offset(p4d, addr); > if (pud_none(*pud)) { > - addr += PUD_SIZE; > + WARN_ON_ONCE(addr & ~PUD_MASK); > + addr = round_up(addr + 1, PUD_SIZE); > continue; > } > > pmd = pmd_offset(pud, addr); > if (pmd_none(*pmd)) { > - addr += PMD_SIZE; > + WARN_ON_ONCE(addr & ~PMD_MASK); > + addr = round_up(addr + 1, PMD_SIZE); > continue; > } Reviewed-by: Ingo Molnar Thanks, Ingo
Re: [PATCH v3 3/3] HID: core: fix dmesg flooding if report field larger than 32bit
ping? I'd love to see this get in. with distro kernel I have effectively no dmesg due to this issue On Mon, Aug 12, 2019 at 9:20 AM wrote: > > From: Joshua Clayton > > Only warn once of oversize hid report value field > > On HP spectre x360 convertible the message: > hid-sensor-hub 001F:8087:0AC2.0002: hid_field_extract() called with n (192) > > 32! (kworker/1:2) > is continually printed many times per second, crowding out all else. > Protect dmesg by printing the warning only one time. > > The size of the hid report field data structure should probably be increased. > The data structure is treated as a u32 in Linux, but an unlimited number > of bits in the USB hid spec, so there is some rearchitecture needed now that > devices are sending more than 32 bits. > > Signed-off-by: Joshua Clayton > > diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c > index 210b81a56e1a..3eaee2c37931 100644 > --- a/drivers/hid/hid-core.c > +++ b/drivers/hid/hid-core.c > @@ -1311,8 +1311,8 @@ u32 hid_field_extract(const struct hid_device *hid, u8 > *report, > unsigned offset, unsigned n) > { > if (n > 32) { > - hid_warn(hid, "hid_field_extract() called with n (%d) > 32! > (%s)\n", > -n, current->comm); > + hid_warn_once(hid, "%s() called with n (%d) > 32! (%s)\n", > + __func__, n, current->comm); > n = 32; > } > > -- > 2.21.0 >
Re: [PATCH 2/2] kbuild: allow Clang to find unused static inline functions for W=1 build
On Wed, Aug 28, 2019 at 11:20 AM Nathan Chancellor wrote: > > On Wed, Aug 28, 2019 at 02:54:25PM +0900, Masahiro Yamada wrote: > > GCC and Clang have different policy for -Wunused-function; GCC does not > > warn unused static inline functions at all whereas Clang does if they > > are defined in source files instead of included headers although it has > > been suppressed since commit abb2ea7dfd82 ("compiler, clang: suppress > > warning for unused static inline functions"). > > > > We often miss to delete unused functions where 'static inline' is used > > in *.c files since there is no tool to detect them. Unused code remains > > until somebody notices. For example, commit 075ddd75680f ("regulator: > > core: remove unused rdev_get_supply()"). > > > > Let's remove __maybe_unused from the inline macro to allow Clang to > > start finding unused static inline functions. For now, we do this only > > for W=1 build since it is not a good idea to sprinkle warnings for the > > normal build. > > > > My initial attempt was to add -Wno-unused-function for no W=1 build > > (https://lore.kernel.org/patchwork/patch/1120594/) > > > > Nathan Chancellor pointed out that would weaken Clang's checks since > > we would no longer get -Wunused-function without W=1. It is true GCC > > would detect unused static non-inline functions, but it would weaken > > Clang as a standalone compiler at least. Got it. No problem. > > > > Here is a counter implementation. The current problem is, W=... only > > controls compiler flags, which are globally effective. There is no way > > to narrow the scope to only 'static inline' functions. > > > > This commit defines KBUILD_EXTRA_WARN[123] corresponding to W=[123]. > > When KBUILD_EXTRA_WARN1 is defined, __maybe_unused is omitted from > > the 'inline' macro. > > > > This makes the code a bit uglier, so personally I do not want to carry > > this forever. If we can manage to fix most of the warnings, we can > > drop this entirely, then enable -Wunused-function all the time. How many warnings? > > > > If you contribute to code clean-up, please run "make CC=clang W=1" > > and check -Wunused-function warnings. You will find lots of unused > > functions. > > > > Some of them are false-positives because the call-sites are disabled > > by #ifdef. I do not like to abuse the inline keyword for suppressing > > unused-function warnings because it is intended to be a hint for the > > compiler optimization. I prefer #ifdef around the definition, or > > __maybe_unused if #ifdef would make the code too ugly. > > > > Signed-off-by: Masahiro Yamada > > I can still see warnings from static unused functions and with W=1, I > see plenty more. I agree that this is uglier because of the > __inline_maybe_unused but I think this is better for regular developers. > I will try to work on these unused-function warnings! How many are we talking here? > > Reviewed-by: Nathan Chancellor > Tested-by: Nathan Chancellor This is getting kind of messy. I was more ok when the goal seemed to be simplifying the definition of `inline`, but this is worse IMO. -- Thanks, ~Nick Desaulniers
Re: [PATCH] PCI: hv: Make functions only used locally static in pci-hyperv.c
Hello Bjorn, Thank you for feedback. [...] Maybe just: PCI: hv: Make functions static since we already know it's in pci-hyperv.c, and it's obvious that you can only do this for functions that are only used locally. Makes sense. I will update the subject line in v3. [...] Rewrap commit log to fill 75 columns. Sorry about this. I wasn't sure if one also should wrap compiler and/or checkpatch.pl script warnings or errors. I will fix this. Does this fix all the similar warnings in drivers/pci/? If there are more, maybe we could fix them all in a single patch or at least a single series? At the moment, while compiling the drivers/pci the compiler yields a similar warning for the following files: arch/x86/pci/xen.c arch/x86/pci/numachip.c drivers/pci/controller/pci-hyperv.c drivers/pci/vc.c I will have a look at each closer. Krzysztof
Re: [PATCH] cpuidle-haltpoll: Enable kvm guest polling when dedicated physical CPUs are available
On Wed, Aug 28, 2019 at 4:39 PM Marcelo Tosatti wrote: > > On Wed, Aug 28, 2019 at 10:45:44AM +0200, Rafael J. Wysocki wrote: > > On Wed, Aug 28, 2019 at 10:34 AM Wanpeng Li wrote: > > > > > > On Tue, 27 Aug 2019 at 08:43, Wanpeng Li wrote: > > > > > > > > Cc Michael S. Tsirkin, > > > > On Tue, 27 Aug 2019 at 04:42, Marcelo Tosatti > > > > wrote: > > > > > > > > > > On Tue, Aug 13, 2019 at 08:55:29AM +0800, Wanpeng Li wrote: > > > > > > On Sun, 4 Aug 2019 at 04:21, Marcelo Tosatti > > > > > > wrote: > > > > > > > > > > > > > > On Thu, Aug 01, 2019 at 06:54:49PM +0200, Paolo Bonzini wrote: > > > > > > > > On 01/08/19 18:51, Rafael J. Wysocki wrote: > > > > > > > > > On 8/1/2019 9:06 AM, Wanpeng Li wrote: > > > > > > > > >> From: Wanpeng Li > > > > > > > > >> > > > > > > > > >> The downside of guest side polling is that polling is > > > > > > > > >> performed even > > > > > > > > >> with other runnable tasks in the host. However, even if poll > > > > > > > > >> in kvm > > > > > > > > >> can aware whether or not other runnable tasks in the same > > > > > > > > >> pCPU, it > > > > > > > > >> can still incur extra overhead in over-subscribe scenario. > > > > > > > > >> Now we can > > > > > > > > >> just enable guest polling when dedicated pCPUs are available. > > > > > > > > >> > > > > > > > > >> Cc: Rafael J. Wysocki > > > > > > > > >> Cc: Paolo Bonzini > > > > > > > > >> Cc: Radim Krčmář > > > > > > > > >> Cc: Marcelo Tosatti > > > > > > > > >> Signed-off-by: Wanpeng Li > > > > > > > > > > > > > > > > > > Paolo, Marcelo, any comments? > > > > > > > > > > > > > > > > Yes, it's a good idea. > > > > > > > > > > > > > > > > Acked-by: Paolo Bonzini > > > > > > Hi Marcelo, > > > > > > If you don't have more concern, I guess Rafael can apply this patch > > > now since the merge window is not too far. > > > > I will likely queue it up later today and it will go to linux-next > > early next week. > > > > Thanks! > > NACK patch. I got an ACK from Paolo on it, though. Convince Paolo to withdraw his ACK if you want it to not be applied. > Just don't load the haltpoll driver. And why would that be better?
Re: [PATCH v1] sefltest/ima: support appended signatures (modsig)
Hello Mimi, Mimi Zohar writes: > In addition to the PE/COFF and IMA xattr signatures, the kexec kernel > image can be signed with an appended signature, using the same > scripts/sign-file tool that is used to sign kernel modules. > > This patch adds support for detecting a kernel image signed with an > appended signature and updates the existing test messages > appropriately. > > Reviewed-by: Petr Vorel > Signed-off-by: Mimi Zohar Thanks for doing this! Reviewed-by: Thiago Jung Bauermann -- Thiago Jung Bauermann IBM Linux Technology Center
Re: [PATCH v1] cpuidle-haltpoll: vcpu hotplug support
On Wed, Aug 28, 2019 at 8:58 PM Joao Martins wrote: > > When cpus != maxcpus cpuidle-haltpoll will fail to register all vcpus > past the online ones and thus fail to register the idle driver. > This is because cpuidle_add_sysfs() will return with -ENODEV as a > consequence from get_cpu_device() return no device for a non-existing > CPU. > > Instead switch to cpuidle_register_driver() and manually register each > of the present cpus through cpuhp_setup_state() and future ones that > get onlined. This mimics similar logic as intel_idle. > > Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver") > Signed-off-by: Joao Martins > Signed-off-by: Boris Ostrovsky Paolo, what do you think? > --- > arch/x86/include/asm/cpuidle_haltpoll.h | 4 +- > arch/x86/kernel/kvm.c | 18 +++ > drivers/cpuidle/cpuidle-haltpoll.c | 65 +++-- > include/linux/cpuidle_haltpoll.h| 4 +- > 4 files changed, 70 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h > b/arch/x86/include/asm/cpuidle_haltpoll.h > index ff8607d81526..c8b39c6716ff 100644 > --- a/arch/x86/include/asm/cpuidle_haltpoll.h > +++ b/arch/x86/include/asm/cpuidle_haltpoll.h > @@ -2,7 +2,7 @@ > #ifndef _ARCH_HALTPOLL_H > #define _ARCH_HALTPOLL_H > > -void arch_haltpoll_enable(void); > -void arch_haltpoll_disable(void); > +void arch_haltpoll_enable(unsigned int cpu); > +void arch_haltpoll_disable(unsigned int cpu); > > #endif > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 8d150e3732d9..a9b6c4e2446d 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -880,32 +880,26 @@ static void kvm_enable_host_haltpoll(void *i) > wrmsrl(MSR_KVM_POLL_CONTROL, 1); > } > > -void arch_haltpoll_enable(void) > +void arch_haltpoll_enable(unsigned int cpu) > { > if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { > - printk(KERN_ERR "kvm: host does not support poll control\n"); > - printk(KERN_ERR "kvm: host upgrade recommended\n"); > + pr_err_once("kvm: host does not support poll control\n"); > + pr_err_once("kvm: host upgrade recommended\n"); > return; > } > > - preempt_disable(); > /* Enable guest halt poll disables host halt poll */ > - kvm_disable_host_haltpoll(NULL); > - smp_call_function(kvm_disable_host_haltpoll, NULL, 1); > - preempt_enable(); > + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); > } > EXPORT_SYMBOL_GPL(arch_haltpoll_enable); > > -void arch_haltpoll_disable(void) > +void arch_haltpoll_disable(unsigned int cpu) > { > if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) > return; > > - preempt_disable(); > /* Enable guest halt poll disables host halt poll */ > - kvm_enable_host_haltpoll(NULL); > - smp_call_function(kvm_enable_host_haltpoll, NULL, 1); > - preempt_enable(); > + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); > } > EXPORT_SYMBOL_GPL(arch_haltpoll_disable); > #endif > diff --git a/drivers/cpuidle/cpuidle-haltpoll.c > b/drivers/cpuidle/cpuidle-haltpoll.c > index 9ac093dcbb01..0d1853a7185e 100644 > --- a/drivers/cpuidle/cpuidle-haltpoll.c > +++ b/drivers/cpuidle/cpuidle-haltpoll.c > @@ -11,12 +11,15 @@ > */ > > #include > +#include > #include > #include > #include > #include > #include > > +static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; > + > static int default_enter_idle(struct cpuidle_device *dev, > struct cpuidle_driver *drv, int index) > { > @@ -46,6 +49,48 @@ static struct cpuidle_driver haltpoll_driver = { > .state_count = 2, > }; > > +static int haltpoll_cpu_online(unsigned int cpu) > +{ > + struct cpuidle_device *dev; > + > + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); > + if (!dev->registered) { > + dev->cpu = cpu; > + if (cpuidle_register_device(dev)) { > + pr_notice("cpuidle_register_device %d failed!\n", > cpu); > + return -EIO; > + } > + arch_haltpoll_enable(cpu); > + } > + > + return 0; > +} > + > +static void haltpoll_uninit(void) > +{ > + unsigned int cpu; > + > + cpus_read_lock(); > + > + for_each_online_cpu(cpu) { > + struct cpuidle_device *dev = > + per_cpu_ptr(haltpoll_cpuidle_devices, cpu); > + > + if (!dev->registered) > + continue; > + > + arch_haltpoll_disable(cpu); > + cpuidle_unregister_device(dev); > + } > + > + cpuidle_unregister(&haltpoll_driver); > + > + free_percpu(haltpoll_cpuidle_devices); > + haltpoll_cpuidle_devices = NULL; > + > + cpus_read_unlock(); > +} > + > static int __init haltpoll_init(voi
Re: [PATCH v2] riscv: add support for SECCOMP and SECCOMP_FILTER
On Wed, Aug 28, 2019 at 02:37:34PM -0700, David Abdurachmanov wrote: > --disk path=$PWD/disk \ > --boot kernel=$PWD/${FIRMWARE} \ This is where I tripped over things. How do I specify the kernel to boot from OUTSIDE the disk image? -- Kees Cook
[PATCH][cifs-next] cifs: ensure variable rc is initialized at the after_open label
From: Colin Ian King A previous fix added a jump to after_open which now leaves variable rc in a uninitialized state. A couple of the cases in the following switch statement do not set variable rc, hence the error check on rc at the end of the switch statement is reading a garbage value in rc for those specific cases. Fix this by initializing rc to zero before the switch statement. Fixes: 955a9c5b39379 ("cifs: create a helper to find a writeable handle by path name") Addresses-Coverity: ("Uninitialized scalar variable") Signed-off-by: Colin Ian King --- fs/cifs/smb2inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 70342bcd89b4..939fc7b2234c 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -116,6 +116,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, smb2_set_next_command(tcon, &rqst[num_rqst]); after_open: num_rqst++; + rc = 0; /* Operation */ switch (command) { -- 2.20.1
[PATCH] selftests/bpf: Fix a typo in test_offload.py
This patch fix a spelling typo in test_offload.py Signed-off-by: Masanari Iida --- tools/testing/selftests/bpf/test_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 425f9ed27c3b..15a666329a34 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -1353,7 +1353,7 @@ try: bpftool_prog_list_wait(expected=1) ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] -fail(ifnameB != simB1['ifname'], "program not bound to originial device") +fail(ifnameB != simB1['ifname'], "program not bound to original device") simB1.remove() bpftool_prog_list_wait(expected=1) -- 2.23.0.37.g745f6812895b
Re: [PATCH v1] sefltest/ima: support appended signatures (modsig)
On Wed, 2019-08-28 at 20:38 -0300, Thiago Jung Bauermann wrote: > Hello Mimi, > > Mimi Zohar writes: > > > In addition to the PE/COFF and IMA xattr signatures, the kexec kernel > > image can be signed with an appended signature, using the same > > scripts/sign-file tool that is used to sign kernel modules. > > > > This patch adds support for detecting a kernel image signed with an > > appended signature and updates the existing test messages > > appropriately. > > > > Reviewed-by: Petr Vorel > > Signed-off-by: Mimi Zohar > > Thanks for doing this! You're welcome. This isn't in lieu of a proper regression test that verifies the IMA measurement list template modsig and d-modsig data fields. That still needs to be written. thanks, Mimi
Re: [PATCH 01/19] dax: remove block device dependencies
On Wed, Aug 28, 2019 at 3:53 PM Dave Chinner wrote: > > On Wed, Aug 28, 2019 at 01:58:43PM -0400, Vivek Goyal wrote: > > On Tue, Aug 27, 2019 at 11:58:09PM -0700, Christoph Hellwig wrote: > > > On Tue, Aug 27, 2019 at 12:38:28PM -0400, Vivek Goyal wrote: > > > > > For bdev_dax_pgoff > > > > > I'd much rather have the partition offset if there is on in the daxdev > > > > > somehow so that we can get rid of the block device entirely. > > > > > > > > IIUC, there is one block_device per partition while there is only one > > > > dax_device for the whole disk. So we can't directly move bdev logical > > > > offset into dax_device. > > > > > > Well, then we need to find a way to get partitions for dax devices, > > > as we really should not expect a block device hiding behind a dax > > > dev. That is just a weird legacy assumption - block device need to > > > layer on top of the dax device optionally. > > > > > > > > > > > We probably could put this in "iomap" and leave it to filesystems to > > > > report offset into dax_dev in iomap that way dax generic code does not > > > > have to deal with it. But that probably will be a bigger change. > > > > > > And where would the file system get that information from? > > > > File system knows about block device, can it just call get_start_sect() > > while filling iomap->addr. And this means we don't have to have > > parition information in dax device. Will something like following work? > > (Just a proof of concept patch). > > > > > > --- > > drivers/dax/super.c | 11 +++ > > fs/dax.c|6 +++--- > > fs/ext4/inode.c |6 +- > > include/linux/dax.h |1 + > > 4 files changed, 20 insertions(+), 4 deletions(-) > > > > Index: rhvgoyal-linux/fs/ext4/inode.c > > === > > --- rhvgoyal-linux.orig/fs/ext4/inode.c 2019-08-28 13:51:16.051937204 > > -0400 > > +++ rhvgoyal-linux/fs/ext4/inode.c2019-08-28 13:51:44.453937204 -0400 > > @@ -3589,7 +3589,11 @@ retry: > > WARN_ON_ONCE(1); > > return -EIO; > > } > > - iomap->addr = (u64)map.m_pblk << blkbits; > > + if (IS_DAX(inode)) > > + iomap->addr = ((u64)map.m_pblk << blkbits) + > > + (get_start_sect(iomap->bdev) * 512); > > + else > > + iomap->addr = (u64)map.m_pblk << blkbits; > > I'm not a fan of returning a physical device sector address from an > interface where ever other user/caller expects this address to be a > logical block address into the block device. It creates a landmine > in the iomap API that callers may not be aware of and that's going > to cause bugs. We're trying really hard to keep special case hacks > like this out of the iomap infrastructure, so on those grounds alone > I'd suggest this is a dead end approach. > > Hence I think that if the dax device needs a physical offset from > the start of the block device the filesystem sits on, it should be > set up at dax device instantiation time and so the filesystem/bdev > never needs to be queried again for this information. > Agree. In retrospect it was my laziness in the dax-device implementation to expect the block-device to be available. It looks like fs_dax_get_by_bdev() is an intercept point where a dax_device could be dynamically created to represent the subset range indicated by the block-device partition. That would open up more cleanup opportunities.
Re: [PATCH 2/2] kbuild: allow Clang to find unused static inline functions for W=1 build
On Wed, Aug 28, 2019 at 04:28:30PM -0700, Nick Desaulniers wrote: > On Wed, Aug 28, 2019 at 11:20 AM Nathan Chancellor > wrote: > > > > On Wed, Aug 28, 2019 at 02:54:25PM +0900, Masahiro Yamada wrote: > > > GCC and Clang have different policy for -Wunused-function; GCC does not > > > warn unused static inline functions at all whereas Clang does if they > > > are defined in source files instead of included headers although it has > > > been suppressed since commit abb2ea7dfd82 ("compiler, clang: suppress > > > warning for unused static inline functions"). > > > > > > We often miss to delete unused functions where 'static inline' is used > > > in *.c files since there is no tool to detect them. Unused code remains > > > until somebody notices. For example, commit 075ddd75680f ("regulator: > > > core: remove unused rdev_get_supply()"). > > > > > > Let's remove __maybe_unused from the inline macro to allow Clang to > > > start finding unused static inline functions. For now, we do this only > > > for W=1 build since it is not a good idea to sprinkle warnings for the > > > normal build. > > > > > > My initial attempt was to add -Wno-unused-function for no W=1 build > > > (https://lore.kernel.org/patchwork/patch/1120594/) > > > > > > Nathan Chancellor pointed out that would weaken Clang's checks since > > > we would no longer get -Wunused-function without W=1. It is true GCC > > > would detect unused static non-inline functions, but it would weaken > > > Clang as a standalone compiler at least. > > Got it. No problem. > > > > > > > Here is a counter implementation. The current problem is, W=... only > > > controls compiler flags, which are globally effective. There is no way > > > to narrow the scope to only 'static inline' functions. > > > > > > This commit defines KBUILD_EXTRA_WARN[123] corresponding to W=[123]. > > > When KBUILD_EXTRA_WARN1 is defined, __maybe_unused is omitted from > > > the 'inline' macro. > > > > > > This makes the code a bit uglier, so personally I do not want to carry > > > this forever. If we can manage to fix most of the warnings, we can > > > drop this entirely, then enable -Wunused-function all the time. > > How many warnings? In an x86 defconfig build (one of the smallest builds we do), I see an additional 35 warnings that crop up: https://gist.github.com/003ba86ba60b4ac7e8109089d6cb1a5a > > > > > > If you contribute to code clean-up, please run "make CC=clang W=1" > > > and check -Wunused-function warnings. You will find lots of unused > > > functions. > > > > > > Some of them are false-positives because the call-sites are disabled > > > by #ifdef. I do not like to abuse the inline keyword for suppressing > > > unused-function warnings because it is intended to be a hint for the > > > compiler optimization. I prefer #ifdef around the definition, or > > > __maybe_unused if #ifdef would make the code too ugly. > > > > > > Signed-off-by: Masahiro Yamada > > > > I can still see warnings from static unused functions and with W=1, I > > see plenty more. I agree that this is uglier because of the > > __inline_maybe_unused but I think this is better for regular developers. > > I will try to work on these unused-function warnings! > > How many are we talking here? > > > > > Reviewed-by: Nathan Chancellor > > Tested-by: Nathan Chancellor > > This is getting kind of messy. I was more ok when the goal seemed to > be simplifying the definition of `inline`, but this is worse IMO. I guess if you want, we can just go back to v1 and have all unused function warnings hidden by default with clang. Fixing these warnings will take a significant amount of time given there will probably be a few hundred so I don't think having this warning hidden behind W=1 for that long is a good thing. Cheers, Nathan
RE: [PATCH v2] PCI: hv: Make functions only used locally static in pci-hyperv.c
Hello Haiyang, Thank you for feedback. [...] The second line should be aligned next to the "(" on the first line. Also the first line is now over 80 chars. Sorry about this. I will fix it in v3. Thank you for pointing this out. To address both the alignment and line length of hv_register_block_invalidate(), I took a hint from the way how hv_compose_msi_req_v1() is current formatted. I hope that this would be acceptable. Krzysztof
Re: [PATCH] PCI: hotplug: Remove surplus return from a void function
On 8/26/19 2:51 AM, Krzysztof Wilczynski wrote: > Remove unnecessary empty return statement at the end of a void > function in the following: > > - drivers/pci/hotplug/cpci_hotplug_core.c: cleanup_slots() > - drivers/pci/hotplug/cpqphp_core.c: pci_print_IRQ_route() > - drivers/pci/hotplug/cpqphp_ctrl.c: cpqhp_pushbutton_thread() > - drivers/pci/hotplug/cpqphp_ctrl.c: interrupt_event_handler() > - drivers/pci/hotplug/cpqphp_nvram.h: compaq_nvram_init() > - drivers/pci/hotplug/rpadlpar_core.c: rpadlpar_io_init() > - drivers/pci/hotplug/rpaphp_core.c: cleanup_slots() > > Signed-off-by: Krzysztof Wilczynski For rpa*_core.c portions, Acked-by: Tyrel Datwyler > --- > drivers/pci/hotplug/cpci_hotplug_core.c | 1 - > drivers/pci/hotplug/cpqphp_core.c | 1 - > drivers/pci/hotplug/cpqphp_ctrl.c | 4 > drivers/pci/hotplug/cpqphp_nvram.h | 5 + > drivers/pci/hotplug/rpadlpar_core.c | 1 - > drivers/pci/hotplug/rpaphp_core.c | 1 - > 6 files changed, 1 insertion(+), 12 deletions(-) > > diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c > b/drivers/pci/hotplug/cpci_hotplug_core.c > index 603eadf3d965..d0559d2faf50 100644 > --- a/drivers/pci/hotplug/cpci_hotplug_core.c > +++ b/drivers/pci/hotplug/cpci_hotplug_core.c > @@ -563,7 +563,6 @@ cleanup_slots(void) > } > cleanup_null: > up_write(&list_rwsem); > - return; > } > > int > diff --git a/drivers/pci/hotplug/cpqphp_core.c > b/drivers/pci/hotplug/cpqphp_core.c > index 16bbb183695a..b8aacb41a83c 100644 > --- a/drivers/pci/hotplug/cpqphp_core.c > +++ b/drivers/pci/hotplug/cpqphp_core.c > @@ -173,7 +173,6 @@ static void pci_print_IRQ_route(void) > dbg("%d %d %d %d\n", tbus, tdevice >> 3, tdevice & 0x7, tslot); > > } > - return; > } > > > diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c > b/drivers/pci/hotplug/cpqphp_ctrl.c > index b7f4e1f099d9..68de958a9be8 100644 > --- a/drivers/pci/hotplug/cpqphp_ctrl.c > +++ b/drivers/pci/hotplug/cpqphp_ctrl.c > @@ -1872,8 +1872,6 @@ static void interrupt_event_handler(struct controller > *ctrl) > } > } /* End of FOR loop */ > } > - > - return; > } > > > @@ -1943,8 +1941,6 @@ void cpqhp_pushbutton_thread(struct timer_list *t) > > p_slot->state = STATIC_STATE; > } > - > - return; > } > > > diff --git a/drivers/pci/hotplug/cpqphp_nvram.h > b/drivers/pci/hotplug/cpqphp_nvram.h > index 918ff8dbfe62..70e879b6a23f 100644 > --- a/drivers/pci/hotplug/cpqphp_nvram.h > +++ b/drivers/pci/hotplug/cpqphp_nvram.h > @@ -16,10 +16,7 @@ > > #ifndef CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM > > -static inline void compaq_nvram_init(void __iomem *rom_start) > -{ > - return; > -} > +static inline void compaq_nvram_init(void __iomem *rom_start) { } > > static inline int compaq_nvram_load(void __iomem *rom_start, struct > controller *ctrl) > { > diff --git a/drivers/pci/hotplug/rpadlpar_core.c > b/drivers/pci/hotplug/rpadlpar_core.c > index 182f9e3443ee..977946e4e613 100644 > --- a/drivers/pci/hotplug/rpadlpar_core.c > +++ b/drivers/pci/hotplug/rpadlpar_core.c > @@ -473,7 +473,6 @@ int __init rpadlpar_io_init(void) > void rpadlpar_io_exit(void) > { > dlpar_sysfs_exit(); > - return; > } > > module_init(rpadlpar_io_init); > diff --git a/drivers/pci/hotplug/rpaphp_core.c > b/drivers/pci/hotplug/rpaphp_core.c > index c3899ee1db99..18627bb21e9e 100644 > --- a/drivers/pci/hotplug/rpaphp_core.c > +++ b/drivers/pci/hotplug/rpaphp_core.c > @@ -408,7 +408,6 @@ static void __exit cleanup_slots(void) > pci_hp_deregister(&slot->hotplug_slot); > dealloc_slot_struct(slot); > } > - return; > } > > static int __init rpaphp_init(void) >
Re: [PATCH] selftests/bpf: Fix a typo in test_offload.py
On Thu, 29 Aug 2019 09:01:30 +0900, Masanari Iida wrote: > This patch fix a spelling typo in test_offload.py > > Signed-off-by: Masanari Iida Acked-by: Jakub Kicinski
Re: objtool warning "uses BP as a scratch register" with clang-9
On Wed, Aug 28, 2019 at 03:13:21PM -0700, Nick Desaulniers wrote: > On Tue, Aug 27, 2019 at 12:22 PM Josh Poimboeuf wrote: > > > > On Tue, Aug 27, 2019 at 09:00:52PM +0200, Arnd Bergmann wrote: > > > On Tue, Aug 27, 2019 at 5:00 PM Ilie Halip wrote: > > > > > > > > > > $ clang-9 -c crc32.i -O2 ; objtool check crc32.o > > > > > > crc32.o: warning: objtool: fn1 uses BP as a scratch register > > > > > > > > Yes, I see it too. https://godbolt.org/z/N56HW1 > > > > > > > > > Do you still see this warning with -fno-omit-frame-pointer (assuming > > > > > clang has that option)? > > > > > > > > Using this makes the warning go away. Running objtool with --no-fp > > > > also gets rid of it. > > > > > > I still see the warning after adding back the -fno-omit-frame-pointer > > > in my reduced test case: > > > > > > $ clang-9 -c crc32.i -Werror -Wno-address-of-packed-member -Wall > > > -Wno-pointer-sign -Wno-unused-value -Wno-constant-logical-operand -O2 > > > -Wno-unused -fno-omit-frame-pointer > > > $ objtool check crc32.o > > > crc32.o: warning: objtool: fn1 uses BP as a scratch register > > > > This warning most likely means that clang is clobbering RBP in leaf > > functions. With -fno-omit-frame-pointer, leaf functions don't need to > > set up the frame pointer, but they at least need to leave RBP untouched, > > so that an interrupts/exceptions can unwind through the function. > > It sounds like clang has `-mno-omit-leaf-frame-pointer` (via > https://bugs.llvm.org/show_bug.cgi?id=43128#c6). Arnd, can you give > that a shot? I think Arnd already confirmed that -mno-omit-leaf-frame-pointer fixed it in an earlier email. -- Josh
Re: [PATCH v3 0/6] hugetlb_cgroup: Add hugetlb_cgroup reservation limits
On Wed, Aug 28, 2019 at 4:23 AM Michal Hocko wrote: > > On Mon 26-08-19 16:32:34, Mina Almasry wrote: > > mm/hugetlb.c | 493 -- > > mm/hugetlb_cgroup.c | 187 +-- > > This is a lot of changes to an already subtle code which hugetlb > reservations undoubly are. Moreover cgroupv1 is feature frozen and I am > not aware of any plans to port the controller to v2. That all doesn't > sound in favor of this change. Actually "no plan to port the controller to v2" makes the case strong for these changes (and other new features) to be done in v1. If there is an alternative solution in v2 then I can understand the push-back on changes in v1 but that is not the case here. Shakeel
[PATCH] ubifs: super: Use struct_size() helper
One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct ubifs_znode { ... struct ubifs_zbranch zbranch[]; }; Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. So, replace the following form: sizeof(struct ubifs_znode) + c->fanout * sizeof(struct ubifs_zbranch) with: struct_size(c->cnext, zbranch, c->fanout) This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva --- fs/ubifs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 2706f13e3eb9..ca86489048c8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -661,8 +661,7 @@ static int init_constants_sb(struct ubifs_info *c) long long tmp64; c->main_bytes = (long long)c->main_lebs * c->leb_size; - c->max_znode_sz = sizeof(struct ubifs_znode) + - c->fanout * sizeof(struct ubifs_zbranch); + c->max_znode_sz = struct_size(c->cnext, zbranch, c->fanout); tmp = ubifs_idx_node_sz(c, 1); c->ranges[UBIFS_IDX_NODE].min_len = tmp; -- 2.23.0
Re: [PATCH 0/1] Fix race in ipmi timer cleanup
On 8/28/19 6:32 PM, Corey Minyard wrote: > On Wed, Aug 28, 2019 at 04:36:24PM -0400, Jes Sorensen wrote: >> From: Jes Sorensen >> >> I came across this in 4.16, but I believe the bug is still present >> in current 5.x, even if it is less likely to trigger. >> >> Basially stop_timer_and_thread() only calls del_timer_sync() if >> timer_running == true. However smi_mod_timer enables the timer before >> setting timer_running = true. > > All the modifications/checks for timer_running should be done under > the si_lock. It looks like a lock is missing in shutdown_smi(), > probably starting before setting interrupt_disabled to true and > after stop_timer_and_thread. I think that is the right fix for > this problem. Hi Corey, I agree a spin lock could deal with this specific issue too, but calling del_timer_sync() is safe to call on an already disabled timer. The whole flagging of timer_running really doesn't make much sense in the first place either. As for interrupt_disabled that is even worse. There's multiple places in the code where interrupt_disabled is checked, some of them are not protected by a spin lock, including shutdown_smi() where you have this sequence: while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)){ poll(smi_info); schedule_timeout_uninterruptible(1); } if (smi_info->handlers) disable_si_irq(smi_info); while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)){ poll(smi_info); schedule_timeout_uninterruptible(1); } In this case you'll have to drop and retake the long several times. You also have this call sequence which leads to disable_si_irq() which checks interrupt_disabled: flush_messages() smi_event_handler() handle_transaction_done() handle_flags() alloc_msg_handle_irq() disable_si_irq() {disable,enable}_si_irq() themselves are racy: static inline bool disable_si_irq(struct smi_info *smi_info) { if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; Basically interrupt_disabled need to be atomic here to have any value, unless you ensure to have a spin lock around every access to it. Cheers, Jes
RE: [PATCH net v4 0/2] r8152: fix side effect
David Miller [mailto:da...@davemloft.net] > Sent: Thursday, August 29, 2019 7:18 AM [...] > > v4: > > Add Fixes tag for both patch #1 and #2. > > I applied v3, sorry. > > I think it is OK as I will backport things to v5.2 -stable anyways. Thanks. Best Regards, Hayes
linux-next: manual merge of the vfs tree with the ceph tree
Hi all, Today's linux-next merge of the vfs tree got a conflict in: fs/ceph/super.c between commit: 8e4133936f30 ("ceph: auto reconnect after blacklisted") from the ceph tree and commit: d91c9998290b ("vfs: Convert ceph to use the new mount API") from the vfs tree. I fixed it up (Thanks to Al for the supplied merge resolution, see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc fs/ceph/super.c index 03b63b1cd32c,6be1e75e70c5.. --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@@ -138,266 -139,269 +139,279 @@@ enum Opt_readdir_max_entries, Opt_readdir_max_bytes, Opt_congestion_kb, - Opt_last_int, - /* int args above */ Opt_snapdirname, Opt_mds_namespace, - Opt_fscache_uniq, + Opt_recover_session, - Opt_last_string, - /* string args above */ Opt_dirstat, - Opt_nodirstat, Opt_rbytes, - Opt_norbytes, Opt_asyncreaddir, - Opt_noasyncreaddir, Opt_dcache, - Opt_nodcache, Opt_ino32, - Opt_noino32, Opt_fscache, - Opt_nofscache, Opt_poolperm, - Opt_nopoolperm, Opt_require_active_mds, - Opt_norequire_active_mds, - #ifdef CONFIG_CEPH_FS_POSIX_ACL Opt_acl, - #endif - Opt_noacl, Opt_quotadf, - Opt_noquotadf, Opt_copyfrom, - Opt_nocopyfrom, + Opt_source, }; - static match_table_t fsopt_tokens = { - {Opt_wsize, "wsize=%d"}, - {Opt_rsize, "rsize=%d"}, - {Opt_rasize, "rasize=%d"}, - {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, - {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, - {Opt_caps_max, "caps_max=%d"}, - {Opt_readdir_max_entries, "readdir_max_entries=%d"}, - {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, - {Opt_congestion_kb, "write_congestion_kb=%d"}, - /* int args above */ - {Opt_snapdirname, "snapdirname=%s"}, - {Opt_mds_namespace, "mds_namespace=%s"}, - {Opt_recover_session, "recover_session=%s"}, - {Opt_fscache_uniq, "fsc=%s"}, - /* string args above */ - {Opt_dirstat, "dirstat"}, - {Opt_nodirstat, "nodirstat"}, - {Opt_rbytes, "rbytes"}, - {Opt_norbytes, "norbytes"}, - {Opt_asyncreaddir, "asyncreaddir"}, - {Opt_noasyncreaddir, "noasyncreaddir"}, - {Opt_dcache, "dcache"}, - {Opt_nodcache, "nodcache"}, - {Opt_ino32, "ino32"}, - {Opt_noino32, "noino32"}, - {Opt_fscache, "fsc"}, - {Opt_nofscache, "nofsc"}, - {Opt_poolperm, "poolperm"}, - {Opt_nopoolperm, "nopoolperm"}, - {Opt_require_active_mds, "require_active_mds"}, - {Opt_norequire_active_mds, "norequire_active_mds"}, - #ifdef CONFIG_CEPH_FS_POSIX_ACL - {Opt_acl, "acl"}, - #endif - {Opt_noacl, "noacl"}, - {Opt_quotadf, "quotadf"}, - {Opt_noquotadf, "noquotadf"}, - {Opt_copyfrom, "copyfrom"}, - {Opt_nocopyfrom, "nocopyfrom"}, - {-1, NULL} + static const struct fs_parameter_spec ceph_param_specs[] = { + fsparam_flag_no ("acl", Opt_acl), + fsparam_flag_no ("asyncreaddir",Opt_asyncreaddir), + fsparam_u32 ("caps_max",Opt_caps_max), + fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), + fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), + fsparam_s32 ("write_congestion_kb", Opt_congestion_kb), + fsparam_flag_no ("copyfrom",Opt_copyfrom), + fsparam_flag_no ("dcache", Opt_dcache), + fsparam_flag_no ("dirstat", Opt_dirstat), + __fsparam (fs_param_is_string, "fsc", Opt_fscache, +fs_param_neg_with_no | fs_param_v_optional), + fsparam_flag_no ("ino32", Opt_ino32), + fsparam_string ("mds_namespace", Opt_mds_namespace), ++ fsparam_string ("recover_session", Opt_recover_session), + fsparam_flag_no ("poolperm",Opt_poolperm), + fsparam_flag_no ("quotadf", Opt_quotadf), + fsparam_u32 ("rasize", Opt_rasize), + fsparam_flag_no ("rbytes", Opt_rbytes), + fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes), + fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries), + fsparam_flag_no ("require_active_mds", Opt_require_active_mds), +
Re: [PATCH v2] riscv: add support for SECCOMP and SECCOMP_FILTER
Hi Kees, On Mon, 26 Aug 2019, Kees Cook wrote: > On Mon, Aug 26, 2019 at 09:39:50AM -0700, David Abdurachmanov wrote: > > I don't have the a build with SECCOMP for the board right now, so it > > will have to wait. I just finished a new kernel (almost rc6) for Fedora, > > FWIW, I don't think this should block landing the code: all the tests > fail without seccomp support. ;) So this patch is an improvement! Am sympathetic to this -- we did it with the hugetlb patches for RISC-V -- but it would be good to understand a little bit more about why the test fails before we merge it. Once we merge the patch, it will probably reduce the motivation for others to either understand and fix the underlying problem with the RISC-V code -- or, if it truly is a flaky test, to drop (or fix) the test in the seccomp_bpf kselftests. Thanks for helping to take a closer look at this, - Paul
Re: [PATCH 0/4] KVM: selftests: Introduce VM_MODE_PXXV48_4K
On Wed, Aug 28, 2019 at 01:52:30PM +0200, Andrew Jones wrote: > On Wed, Aug 28, 2019 at 01:51:06PM +0200, Andrew Jones wrote: > > On Tue, Aug 27, 2019 at 09:10:11PM +0800, Peter Xu wrote: > > > The work is based on Thomas's s390 port for dirty_log_test. [1] > > > > > > This series originates from "[PATCH] KVM: selftests: Detect max PA > > > width from cpuid" [1] and one of Drew's comments - instead of keeping > > > the hackish line to overwrite guest_pa_bits all the time, this series > > > introduced the new mode VM_MODE_PXXV48_4K for x86_64 platform. > > > > > > The major issue is that even all the x86_64 kvm selftests are > > > currently using the guest mode VM_MODE_P52V48_4K, many x86_64 hosts > > > are not using 52 bits PA (and in most cases, far less). If with luck > > > we could be having 48 bits hosts, but it's more adhoc (I've observed 3 > > > x86_64 systems, they are having different PA width of 36, 39, 48). I > > > am not sure whether this is happening to the other archs as well, but > > > it probably makes sense to bring the x86_64 tests to the real world on > > > always using the correct PA bits. > > > > > > A side effect of this series is that it will also fix the crash we've > > > encountered on Xeon E3-1220 as mentioned [1] due to the > > > differenciation of PA width. > > > > > > With [1], we've observed AMD host issues when with NPT=off. However a > > > funny fact is that after I reworked into this series, the tests can > > > instead pass on both NPT=on/off. It could be that the series changes > > > vm->pa_bits or other fields so something was affected. I didn't dig > > > more on that though, considering we should not lose anything. > > > > > > Any kind of smoke test would be greatly welcomed (especially on s390 > > > or ARM). Same to comments. Thanks, > > > > > > > The patches didn't apply cleanly for me on 9e8312f5e160, but once I got > > them applied I was able to run the aarch64 tests. Right, because I applied Thomas's s390x port as base [1], considering that that one should reach kvm/queue earlier (should be in the submaintainer's tree and waiting for a pull). Maybe I should post against the current kvm/queue next time? After all this series does not modify anything of the s390x work so the conflict should be trivial. > > Oh, and after fixing 2/4 (vm->pa_bits) to fix compilation on aarch64 as > pointed out on that patch. Thanks for verifying and reviews! Yes I'll fix that up. Regards, -- Peter Xu
Re: linux-next: Fixes tags need some work in the mmc-fixes tree
On Thu, 29 Aug 2019 at 05:55, Stephen Rothwell wrote: > > Hi all, > > In commit > > f93be8a7a366 ("mmc: sdhci-sprd: clear the UHS-I modes read from registers") > > Fixes tag > > Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host > > has these problem(s): > > - Subject has leading but no trailing parentheses > - Subject has leading but no trailing quotes > > In commit > > 218258427ce0 ("mms: sdhci-sprd: add SDHCI_QUIRK_BROKEN_CARD_DETECTION") > > Fixes tag > > Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host > > has these problem(s): > > - Subject has leading but no trailing parentheses > - Subject has leading but no trailing quotes > > In commit > > 1236e62ef8de ("mmc: sdhci-sprd: add SDHCI_QUIRK2_PRESET_VALUE_BROKEN") > > Fixes tag > > Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host > > has these problem(s): > > - Subject has leading but no trailing parentheses > - Subject has leading but no trailing quotes > > In commit > > 8180519b1be0 ("mmc: sdhci-sprd: add get_ro hook function") > > Fixes tag > > Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host > > has these problem(s): > > - Subject has leading but no trailing parentheses > - Subject has leading but no trailing quotes > > In commit > > b4e4296cc206 ("mmc: sdhci-sprd: fixed incorrect clock divider") > > Fixes tag > > Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host > > has these problem(s): > > - Subject has leading but no trailing parentheses > - Subject has leading but no trailing quotes > > Please do not split Fixes tags over more tha one line - even if the line > is more than 80 characters. I will send another patch-set. Thanks, Chunyan > > -- > Cheers, > Stephen Rothwell
Re: [PATCH 1/4] KVM: selftests: Move vm type into _vm_create() internally
On Wed, Aug 28, 2019 at 01:23:57PM +0200, Andrew Jones wrote: [...] > > > { > > struct kvm_vm *vm; > > > > @@ -139,8 +139,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, > > uint64_t phy_pages, > > TEST_ASSERT(vm != NULL, "Insufficient Memory"); > > > > vm->mode = mode; > > - vm->type = type; > > - vm_open(vm, perm, type); > > + vm->type = 0; > > > > /* Setup mode specific traits. */ > > switch (vm->mode) { > > @@ -190,6 +189,13 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, > > uint64_t phy_pages, > > TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); > > } > > > > +#ifdef __aarch64__ > > + if (vm->pa_bits != 40) > > + vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); > ^^ > should be vm->pa_bits Thanks for spotting it! Fixed this and also the rest of indent issues. Regards, -- Peter Xu
Re: [PATCH] thermal: armada: Fix -Wshift-negative-value
On Wed, 2019-08-28 at 11:49 -0700, Nick Desaulniers wrote: > On Wed, Aug 28, 2019 at 1:53 AM Zhang Rui > wrote: > > > > On Mon, 2019-08-19 at 10:21 +0200, Miquel Raynal wrote: > > > Hello, > > > > > > Daniel Lezcano wrote on Thu, 15 Aug > > > 2019 > > > 01:06:21 +0200: > > > > > > > On 15/08/2019 00:12, Nick Desaulniers wrote: > > > > > On Tue, Aug 13, 2019 at 10:28 AM 'Nathan Huckleberry' via > > > > > Clang > > > > > Built > > > > > Linux wrote: > > > > > > > > > > > > Following up to see if this patch is going to be accepted. > > > > > > > > > > Miquel is listed as the maintainer of this file in > > > > > MAINTAINERS. > > > > > Miquel, can you please pick this up? Otherwise Zhang, > > > > > Eduardo, > > > > > and > > > > > Daniel are listed as maintainers for drivers/thermal/. > > > > > > > > I'm listed as reviewer, it is up to Zhang or Eduardo to take > > > > the > > > > patches. > > > > > > > > > > > > > > Sorry for the delay, I don't manage a tree for this driver, I'll > > > let > > > Zhang or Eduardo take the patch with my > > > > > > Acked-by: Miquel Raynal > > > > > > > Patch applied. > > > > thanks, > > rui > > Thanks Rui, did you push the branch? I guess I would have expected > it > in > https://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git/log/?h=next > ? > I'm trying to track where this lands in > https://github.com/ClangBuiltLinux/linux/issues/532. Not yet. I will push it to kernel.org after I finish my internal build test. thanks, rui >
Re: [PATCH v2 1/2] dt-bindings: phy: intel-sdxc-phy: Add YAML schema for LGM SDXC PHY
Hi Langer, Thank you for the review comments. On 28/8/2019 11:37 PM, Langer, Thomas wrote: Hi Vadivel, +... diff --git a/Documentation/devicetree/bindings/phy/intel,syscon.yaml b/Documentation/devicetree/bindings/phy/intel,syscon.yaml new file mode 100644 index ..d0b78805e49f --- /dev/null +++ b/Documentation/devicetree/bindings/phy/intel,syscon.yaml @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/intel,syscon.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Syscon for eMMC/SDXC PHY Device Tree Bindings This says the binding is for eMMC/SDXC Agreed, fix it. + +maintainers: + - Ramuthevar Vadivel Murugan + +properties: + compatible: +const: intel,syscon but this is a generic syscon, behind which are many registers, not only for eMMC/SDXC. Also, the registers will be different for each SoC and needed for many different drivers, that is why in your example it is called "chiptop" -> toplevel registers not belonging to a specific HW module. Rob: Do you also think this "intel,syscon" is too generic? And the binding should be outside the "phy" folder? What is the way to support different SoCs with this? Must the driver referencing this syscon be aware of these differences? [Vadivel] : most of the IP drivers are using syscon, please suggest me to keep in the right place since it is common to all(w.r.t Intel's Lightning Mountain). Best Regards Vadivel + + reg: +maxItems: 1 + + "#reset-cells": + const: 1 + +required: + - compatible + - reg + - "#reset-cells" + +examples: + - | +sysconf: chiptop@e002 { + compatible = "intel,syscon", "syscon"; + reg = <0xe002 0x100>; + #reset-cells = <1>; +}; -- 2.11.0 Best regards, Thomas
[RESEND PATCH v2 1/5] mmc: sdhci-sprd: fixed incorrect clock divider
From: Chunyan Zhang The register SDHCI_CLOCK_CONTROL should be cleared before config clock divider, otherwise the frequency configured maybe lower than we expected. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang --- drivers/mmc/host/sdhci-sprd.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 6ee340a3fb3a..d5871865a1e9 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -217,10 +217,11 @@ static inline void _sdhci_sprd_set_clock(struct sdhci_host *host, struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host); u32 div, val, mask; - div = sdhci_sprd_calc_div(sprd_host->base_rate, clk); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - clk |= ((div & 0x300) >> 2) | ((div & 0xFF) << 8); - sdhci_enable_clk(host, clk); + div = sdhci_sprd_calc_div(sprd_host->base_rate, clk); + div = ((div & 0x300) >> 2) | ((div & 0xFF) << 8); + sdhci_enable_clk(host, div); /* enable auto gate sdhc_enable_auto_gate */ val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); -- 2.20.1
[RESEND PATCH v2 0/5] a few fixes for sprd's sd host controller
With this patch-set, both sd card and mmc can be setup. This patch-set was verified on Unisoc's Whale2 and another mobile phone platform SC9863A. Changes from v1: - Added Reviewed-by and Tested-by of Baolin; - Added fixes tag for all patches in this series. Chunyan Zhang (5): mmc: sdhci-sprd: fixed incorrect clock divider mmc: sdhci-sprd: add get_ro hook function mmc: sdhci-sprd: add SDHCI_QUIRK2_PRESET_VALUE_BROKEN mms: sdhci-sprd: add SDHCI_QUIRK_BROKEN_CARD_DETECTION mmc: sdhci-sprd: clear the UHS-I modes read from registers drivers/mmc/host/sdhci-sprd.c | 30 +- 1 file changed, 25 insertions(+), 5 deletions(-) -- 2.20.1
[RESEND PATCH v2 2/5] mmc: sdhci-sprd: add get_ro hook function
From: Chunyan Zhang sprd's sd host controller doesn't support write protect to sd card. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang --- drivers/mmc/host/sdhci-sprd.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index d5871865a1e9..90cb2af91159 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -374,6 +374,11 @@ static unsigned int sdhci_sprd_get_max_timeout_count(struct sdhci_host *host) return 1 << 31; } +static unsigned int sdhci_sprd_get_ro(struct sdhci_host *host) +{ + return 0; +} + static struct sdhci_ops sdhci_sprd_ops = { .read_l = sdhci_sprd_readl, .write_l = sdhci_sprd_writel, @@ -386,6 +391,7 @@ static struct sdhci_ops sdhci_sprd_ops = { .set_uhs_signaling = sdhci_sprd_set_uhs_signaling, .hw_reset = sdhci_sprd_hw_reset, .get_max_timeout_count = sdhci_sprd_get_max_timeout_count, + .get_ro = sdhci_sprd_get_ro, }; static void sdhci_sprd_request(struct mmc_host *mmc, struct mmc_request *mrq) -- 2.20.1
[RESEND PATCH v2 4/5] mms: sdhci-sprd: add SDHCI_QUIRK_BROKEN_CARD_DETECTION
From: Chunyan Zhang sprd's sd host controller doesn't support detection to card insert or remove. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang --- drivers/mmc/host/sdhci-sprd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 27d0b57f3f89..1fecf055682c 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -508,7 +508,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, } static const struct sdhci_pltfm_data sdhci_sprd_pdata = { - .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | SDHCI_QUIRK2_USE_32BIT_BLK_CNT | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, -- 2.20.1
[RESEND PATCH v2 3/5] mmc: sdhci-sprd: add SDHCI_QUIRK2_PRESET_VALUE_BROKEN
From: Chunyan Zhang The bit of PRESET_VAL_ENABLE in HOST_CONTROL2 register is reserved on sprd's sd host controller, set quirk2 to disable configuring this. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang --- drivers/mmc/host/sdhci-sprd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 90cb2af91159..27d0b57f3f89 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -510,7 +510,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, static const struct sdhci_pltfm_data sdhci_sprd_pdata = { .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | - SDHCI_QUIRK2_USE_32BIT_BLK_CNT, + SDHCI_QUIRK2_USE_32BIT_BLK_CNT | + SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .ops = &sdhci_sprd_ops, }; -- 2.20.1
Re: [PATCH] ARC: unwind: Mark expected switch fall-through
Hi, Friendly ping: Who can take this, please? Thanks -- Gustavo On 8/20/19 8:29 PM, Gustavo A. R. Silva wrote: > Mark switch cases where we are expecting to fall through. > > This patch fixes the following warnings (Building: haps_hs_defconfig arc): > > arch/arc/kernel/unwind.c: In function ‘read_pointer’: > ./include/linux/compiler.h:328:5: warning: this statement may fall through > [-Wimplicit-fallthrough=] > do {\ > ^ > ./include/linux/compiler.h:338:2: note: in expansion of macro > ‘__compiletime_assert’ > __compiletime_assert(condition, msg, prefix, suffix) > ^~~~ > ./include/linux/compiler.h:350:2: note: in expansion of macro > ‘_compiletime_assert’ > _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) > ^~~ > ./include/linux/build_bug.h:39:37: note: in expansion of macro > ‘compiletime_assert’ > #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) > ^~ > ./include/linux/build_bug.h:50:2: note: in expansion of macro > ‘BUILD_BUG_ON_MSG’ > BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) > ^~~~ > arch/arc/kernel/unwind.c:573:3: note: in expansion of macro ‘BUILD_BUG_ON’ >BUILD_BUG_ON(sizeof(u32) != sizeof(value)); >^~~~ > arch/arc/kernel/unwind.c:575:2: note: here > case DW_EH_PE_native: > ^~~~ > > Signed-off-by: Gustavo A. R. Silva > --- > arch/arc/kernel/unwind.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c > index 445e4d702f43..dc05a63516f5 100644 > --- a/arch/arc/kernel/unwind.c > +++ b/arch/arc/kernel/unwind.c > @@ -572,6 +572,7 @@ static unsigned long read_pointer(const u8 **pLoc, const > void *end, > #else > BUILD_BUG_ON(sizeof(u32) != sizeof(value)); > #endif > + /* Fall through */ > case DW_EH_PE_native: > if (end < (const void *)(ptr.pul + 1)) > return 0; >
[RESEND PATCH v2 5/5] mmc: sdhci-sprd: clear the UHS-I modes read from registers
From: Chunyan Zhang sprd's sd host controller supports SDR50/SDR104/DDR50 though, the UHS-I mode used by the specific card can be selected via devicetree only. Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Chunyan Zhang Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Tested-by: Baolin Wang --- drivers/mmc/host/sdhci-sprd.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 1fecf055682c..d3c3e95676f0 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -509,7 +509,8 @@ static void sdhci_sprd_phy_param_parse(struct sdhci_sprd_host *sprd_host, static const struct sdhci_pltfm_data sdhci_sprd_pdata = { .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | - SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK, + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_MISSING_CAPS, .quirks2 = SDHCI_QUIRK2_BROKEN_HS200 | SDHCI_QUIRK2_USE_32BIT_BLK_CNT | SDHCI_QUIRK2_PRESET_VALUE_BROKEN, @@ -614,6 +615,16 @@ static int sdhci_sprd_probe(struct platform_device *pdev) sdhci_enable_v4_mode(host); + /* +* Supply the existing CAPS, but clear the UHS-I modes. This +* will allow these modes to be specified only by device +* tree properties through mmc_of_parse(). +*/ + host->caps = sdhci_readl(host, SDHCI_CAPABILITIES); + host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1); + host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | +SDHCI_SUPPORT_DDR50); + ret = sdhci_setup_host(host); if (ret) goto pm_runtime_disable; -- 2.20.1
[PATCH v14 00/10] support gce on mt8183 platform
Changes since v13: - separate poll function as poll w/ & w/o mask function - directly pass inst into append_command function instead of returns a pointer - fixup coding style - rebase onto 5.3-rc1 Changes since v12: - clear the value of command ptr address - fixup some typo and remove unused define Changes since v11: - correct some data type to avoid type conversion Changes since v10: - remove subsys-cell from gce device node - use of_parse_phandle_with_fixed_args instead of of_parse_phandle_with_args Changes since v8 and v9: - change the error return code in cmdq_dev_get_client_reg() Changes since v7: - remove the memory allocation out of cmdq_dev_get_client_reg() - rebase onto 5.2-rc1 Changes since v6: - remove cmdq_dev_get_event function and gce event property - separate some changes to indepentent patch - change the binding document related to gce-client-reg property Changes since v5: - fix typo - remove gce-event-name form the dt-binding - add reasons in commit message Changes since v4: - refine the architecture of the packet encoder function - refine the gce enevt property - change the patch's title Changes since v3: - fix a typo in dt-binding and dtsi - cast the return value to right format Changes since v2: - according to CK's review comment, change the property name and refine the parameter - change the patch's title - remove unused property from dt-binding and dts Changes since v1: - add prefix "cmdq" in the commit subject - add dt-binding document for get event and subsys function - add fix up tag in fixup patch - fix up some coding style (alignment) MTK will support gce function on mt8183 platform. dt-binding: gce: add gce header file for mt8183 mailbox: mediatek: cmdq: support mt8183 gce function arm64: dts: add gce node for mt8183 Besides above patches, we refine gce driver on those patches. mailbox: mediatek: cmdq: move the CMDQ_IRQ_MASK into cmdq driver data mailbox: mediatek: cmdq: clear the event in cmdq initial flow In order to enhance the convenience of gce usage, we add new helper functions and refine the method of instruction combining. dt-binding: gce: remove thread-num property dt-binding: gce: add binding for gce client reg property soc: mediatek: cmdq: define the instruction struct soc: mediatek: cmdq: add polling function soc: mediatek: cmdq: add cmdq_dev_get_client_reg function Bibby Hsieh (10): dt-binding: gce: remove thread-num property dt-binding: gce: add gce header file for mt8183 dt-binding: gce: add binding for gce client reg property mailbox: mediatek: cmdq: move the CMDQ_IRQ_MASK into cmdq driver data mailbox: mediatek: cmdq: support mt8183 gce function mailbox: mediatek: cmdq: clear the event in cmdq initial flow soc: mediatek: cmdq: define the instruction struct soc: mediatek: cmdq: add polling function soc: mediatek: cmdq: add cmdq_dev_get_client_reg function arm64: dts: add gce node for mt8183 .../devicetree/bindings/mailbox/mtk-gce.txt | 23 ++- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 10 + drivers/mailbox/mtk-cmdq-mailbox.c| 18 +- drivers/soc/mediatek/mtk-cmdq-helper.c| 136 +++--- include/dt-bindings/gce/mt8183-gce.h | 175 ++ include/linux/mailbox/mtk-cmdq-mailbox.h | 14 ++ include/linux/soc/mediatek/mtk-cmdq.h | 56 +- 7 files changed, 390 insertions(+), 42 deletions(-) create mode 100644 include/dt-bindings/gce/mt8183-gce.h -- 2.18.0
[PATCH v14 01/10] dt-binding: gce: remove thread-num property
"thread-num" is an unused property so we remove it from example. Signed-off-by: Bibby Hsieh Reviewed-by: Rob Herring --- Documentation/devicetree/bindings/mailbox/mtk-gce.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt index 7d72b21c9e94..cfe40b01d164 100644 --- a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt +++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt @@ -39,7 +39,6 @@ Example: interrupts = ; clocks = <&infracfg CLK_INFRA_GCE>; clock-names = "gce"; - thread-num = CMDQ_THR_MAX_COUNT; #mbox-cells = <3>; }; -- 2.18.0
[PATCH v14 06/10] soc: mediatek: cmdq: clear the event in cmdq initial flow
GCE hardware stored event information in own internal sysram, if the initial value in those sysram is not zero value it will cause a situation that gce can wait the event immediately after client ask gce to wait event but not really trigger the corresponding hardware. In order to make sure that the wait event function is exactly correct, we need to clear the sysram value in cmdq initial flow. Fixes: 623a6143a845 ("mailbox: mediatek: Add Mediatek CMDQ driver") Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu --- drivers/mailbox/mtk-cmdq-mailbox.c | 5 + include/linux/mailbox/mtk-cmdq-mailbox.h | 2 ++ include/linux/soc/mediatek/mtk-cmdq.h| 3 --- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 69daaadc3a5f..9a6ce9f5a7db 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -21,6 +21,7 @@ #define CMDQ_NUM_CMD(t)(t->cmd_buf_size / CMDQ_INST_SIZE) #define CMDQ_CURR_IRQ_STATUS 0x10 +#define CMDQ_SYNC_TOKEN_UPDATE 0x68 #define CMDQ_THR_SLOT_CYCLES 0x30 #define CMDQ_THR_BASE 0x100 #define CMDQ_THR_SIZE 0x80 @@ -104,8 +105,12 @@ static void cmdq_thread_resume(struct cmdq_thread *thread) static void cmdq_init(struct cmdq *cmdq) { + int i; + WARN_ON(clk_enable(cmdq->clock) < 0); writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES); + for (i = 0; i <= CMDQ_MAX_EVENT; i++) + writel(i, cmdq->base + CMDQ_SYNC_TOKEN_UPDATE); clk_disable(cmdq->clock); } diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index ccb73422c2fa..911475da7a53 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -19,6 +19,8 @@ #define CMDQ_WFE_UPDATEBIT(31) #define CMDQ_WFE_WAIT BIT(15) #define CMDQ_WFE_WAIT_VALUE0x1 +/** cmdq event maximum */ +#define CMDQ_MAX_EVENT 0x3ff /* * CMDQ_CODE_MASK: diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index f3ae45d02e80..9618debb9ceb 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -13,9 +13,6 @@ #define CMDQ_NO_TIMEOUT0xu -/** cmdq event maximum */ -#define CMDQ_MAX_EVENT 0x3ff - struct cmdq_pkt; struct cmdq_client { -- 2.18.0
[PATCH v14 04/10] mailbox: mediatek: cmdq: move the CMDQ_IRQ_MASK into cmdq driver data
The interrupt mask and thread number has positive correlation, so we move the CMDQ_IRQ_MASK into cmdq driver data and calculate it by thread number. Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu Reviewed-by: Matthias Brugger --- drivers/mailbox/mtk-cmdq-mailbox.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 00d5219094e5..8fddd26288e8 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -18,7 +18,6 @@ #include #define CMDQ_OP_CODE_MASK (0xff << CMDQ_OP_CODE_SHIFT) -#define CMDQ_IRQ_MASK 0x #define CMDQ_NUM_CMD(t)(t->cmd_buf_size / CMDQ_INST_SIZE) #define CMDQ_CURR_IRQ_STATUS 0x10 @@ -72,6 +71,7 @@ struct cmdq { void __iomem*base; u32 irq; u32 thread_nr; + u32 irq_mask; struct cmdq_thread *thread; struct clk *clock; boolsuspended; @@ -285,11 +285,11 @@ static irqreturn_t cmdq_irq_handler(int irq, void *dev) unsigned long irq_status, flags = 0L; int bit; - irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & CMDQ_IRQ_MASK; - if (!(irq_status ^ CMDQ_IRQ_MASK)) + irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & cmdq->irq_mask; + if (!(irq_status ^ cmdq->irq_mask)) return IRQ_NONE; - for_each_clear_bit(bit, &irq_status, fls(CMDQ_IRQ_MASK)) { + for_each_clear_bit(bit, &irq_status, cmdq->thread_nr) { struct cmdq_thread *thread = &cmdq->thread[bit]; spin_lock_irqsave(&thread->chan->lock, flags); @@ -473,6 +473,9 @@ static int cmdq_probe(struct platform_device *pdev) dev_err(dev, "failed to get irq\n"); return -EINVAL; } + + cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev); + cmdq->irq_mask = GENMASK(cmdq->thread_nr - 1, 0); err = devm_request_irq(dev, cmdq->irq, cmdq_irq_handler, IRQF_SHARED, "mtk_cmdq", cmdq); if (err < 0) { @@ -489,7 +492,6 @@ static int cmdq_probe(struct platform_device *pdev) return PTR_ERR(cmdq->clock); } - cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev); cmdq->mbox.dev = dev; cmdq->mbox.chans = devm_kcalloc(dev, cmdq->thread_nr, sizeof(*cmdq->mbox.chans), GFP_KERNEL); -- 2.18.0
[PATCH v14 10/10] arm64: dts: add gce node for mt8183
add gce device node for mt8183 Signed-off-by: Bibby Hsieh --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 66aaa07f6cec..52b9af38a00a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -9,6 +9,7 @@ #include #include #include +#include #include "mt8183-pinfunc.h" / { @@ -321,6 +322,15 @@ status = "disabled"; }; + gce: mailbox@10238000 { + compatible = "mediatek,mt8183-gce"; + reg = <0 0x10238000 0 0x4000>; + interrupts = ; + #mbox-cells = <3>; + clocks = <&infracfg CLK_INFRA_GCE>; + clock-names = "gce"; + }; + uart0: serial@11002000 { compatible = "mediatek,mt8183-uart", "mediatek,mt6577-uart"; -- 2.18.0
[PATCH v14 02/10] dt-binding: gce: add gce header file for mt8183
Add documentation for the mt8183 gce. Add gce header file defined the gce hardware event, subsys number and constant for mt8183. Signed-off-by: Bibby Hsieh Reviewed-by: Rob Herring --- .../devicetree/bindings/mailbox/mtk-gce.txt | 6 +- include/dt-bindings/gce/mt8183-gce.h | 175 ++ 2 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 include/dt-bindings/gce/mt8183-gce.h diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt index cfe40b01d164..1f7f8f2a3f49 100644 --- a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt +++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt @@ -9,7 +9,7 @@ CMDQ driver uses mailbox framework for communication. Please refer to mailbox.txt for generic information about mailbox device-tree bindings. Required properties: -- compatible: Must be "mediatek,mt8173-gce" +- compatible: can be "mediatek,mt8173-gce" or "mediatek,mt8183-gce" - reg: Address range of the GCE unit - interrupts: The interrupt signal from the GCE block - clock: Clocks according to the common clock binding @@ -28,8 +28,8 @@ Required properties for a client device: - mediatek,gce-subsys: u32, specify the sub-system id which is corresponding to the register address. -Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h'. Such as -sub-system ids, thread priority, event ids. +Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h' +or 'dt-binding/gce/mt8183-gce.h'. Such as sub-system ids, thread priority, event ids. Example: diff --git a/include/dt-bindings/gce/mt8183-gce.h b/include/dt-bindings/gce/mt8183-gce.h new file mode 100644 index ..29c967476f73 --- /dev/null +++ b/include/dt-bindings/gce/mt8183-gce.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: Bibby Hsieh + * + */ + +#ifndef _DT_BINDINGS_GCE_MT8183_H +#define _DT_BINDINGS_GCE_MT8183_H + +#define CMDQ_NO_TIMEOUT0x + +/* GCE HW thread priority */ +#define CMDQ_THR_PRIO_LOWEST 0 +#define CMDQ_THR_PRIO_HIGHEST 1 + +/* GCE SUBSYS */ +#define SUBSYS_13000 +#define SUBSYS_14001 +#define SUBSYS_14012 +#define SUBSYS_14023 +#define SUBSYS_15024 +#define SUBSYS_18805 +#define SUBSYS_18816 +#define SUBSYS_18827 +#define SUBSYS_18838 +#define SUBSYS_18849 +#define SUBSYS_100010 +#define SUBSYS_100111 +#define SUBSYS_100212 +#define SUBSYS_100313 +#define SUBSYS_100414 +#define SUBSYS_100515 +#define SUBSYS_102016 +#define SUBSYS_102817 +#define SUBSYS_170018 +#define SUBSYS_170119 +#define SUBSYS_170220 +#define SUBSYS_170321 +#define SUBSYS_180022 +#define SUBSYS_180123 +#define SUBSYS_180224 +#define SUBSYS_180425 +#define SUBSYS_180526 +#define SUBSYS_180827 +#define SUBSYS_180a28 +#define SUBSYS_180b29 + +#define CMDQ_EVENT_DISP_RDMA0_SOF 0 +#define CMDQ_EVENT_DISP_RDMA1_SOF 1 +#define CMDQ_EVENT_MDP_RDMA0_SOF 2 +#define CMDQ_EVENT_MDP_RSZ0_SOF 4 +#define CMDQ_EVENT_MDP_RSZ1_SOF 5 +#define CMDQ_EVENT_MDP_TDSHP_SOF 6 +#define CMDQ_EVENT_MDP_WROT0_SOF 7 +#define CMDQ_EVENT_MDP_WDMA0_SOF 8 +#define CMDQ_EVENT_DISP_OVL0_SOF 9 +#define CMDQ_EVENT_DISP_OVL0_2L_SOF10 +#define CMDQ_EVENT_DISP_OVL1_2L_SOF11 +#define CMDQ_EVENT_DISP_WDMA0_SOF 12 +#define CMDQ_EVENT_DISP_COLOR0_SOF 13 +#define CMDQ_EVENT_DISP_CCORR0_SOF 14 +#define CMDQ_EVENT_DISP_AAL0_SOF 15 +#define CMDQ_EVENT_DISP_GAMMA0_SOF 16 +#define CMDQ_EVENT_DISP_DITHER0_SOF17 +#define CMDQ_EVENT_DISP_PWM0_SOF 18 +#define CMDQ_EVENT_DISP_DSI0_SOF 19 +#define CMDQ_EVENT_DISP_DPI0_SOF 20 +#
[PATCH v14 07/10] soc: mediatek: cmdq: define the instruction struct
Define an instruction structure for gce driver to append command. This structure can make the client's code more readability. Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu Reviewed-by: Houlong Wei --- drivers/soc/mediatek/mtk-cmdq-helper.c | 77 include/linux/mailbox/mtk-cmdq-mailbox.h | 10 +++ 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c index 7aa0517ff2f3..9472526ab076 100644 --- a/drivers/soc/mediatek/mtk-cmdq-helper.c +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -9,12 +9,24 @@ #include #include -#define CMDQ_ARG_A_WRITE_MASK 0x #define CMDQ_WRITE_ENABLE_MASK BIT(0) #define CMDQ_EOC_IRQ_ENBIT(0) #define CMDQ_EOC_CMD ((u64)((CMDQ_CODE_EOC << CMDQ_OP_CODE_SHIFT)) \ << 32 | CMDQ_EOC_IRQ_EN) +struct cmdq_instruction { + union { + u32 value; + u32 mask; + }; + union { + u16 offset; + u16 event; + }; + u8 subsys; + u8 op; +}; + static void cmdq_client_timeout(struct timer_list *t) { struct cmdq_client *client = from_timer(client, t, timer); @@ -110,10 +122,10 @@ void cmdq_pkt_destroy(struct cmdq_pkt *pkt) } EXPORT_SYMBOL(cmdq_pkt_destroy); -static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code, - u32 arg_a, u32 arg_b) +static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, + struct cmdq_instruction inst) { - u64 *cmd_ptr; + struct cmdq_instruction *cmd_ptr; if (unlikely(pkt->cmd_buf_size + CMDQ_INST_SIZE > pkt->buf_size)) { /* @@ -129,8 +141,9 @@ static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code, __func__, (u32)pkt->buf_size); return -ENOMEM; } + cmd_ptr = pkt->va_base + pkt->cmd_buf_size; - (*cmd_ptr) = (u64)((code << CMDQ_OP_CODE_SHIFT) | arg_a) << 32 | arg_b; + *cmd_ptr = inst; pkt->cmd_buf_size += CMDQ_INST_SIZE; return 0; @@ -138,24 +151,31 @@ static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code, int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value) { - u32 arg_a = (offset & CMDQ_ARG_A_WRITE_MASK) | - (subsys << CMDQ_SUBSYS_SHIFT); + struct cmdq_instruction inst; + + inst.op = CMDQ_CODE_WRITE; + inst.value = value; + inst.offset = offset; + inst.subsys = subsys; - return cmdq_pkt_append_command(pkt, CMDQ_CODE_WRITE, arg_a, value); + return cmdq_pkt_append_command(pkt, inst); } EXPORT_SYMBOL(cmdq_pkt_write); int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask) { - u32 offset_mask = offset; + struct cmdq_instruction inst = { {0} }; + u16 offset_mask = offset; int err = 0; if (mask != 0x) { - err = cmdq_pkt_append_command(pkt, CMDQ_CODE_MASK, 0, ~mask); + inst.op = CMDQ_CODE_MASK; + inst.mask = ~mask; + err = cmdq_pkt_append_command(pkt, inst); offset_mask |= CMDQ_WRITE_ENABLE_MASK; } - err |= cmdq_pkt_write(pkt, value, subsys, offset_mask); + err |= cmdq_pkt_write(pkt, subsys, offset_mask, value); return err; } @@ -163,43 +183,48 @@ EXPORT_SYMBOL(cmdq_pkt_write_mask); int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event) { - u32 arg_b; + struct cmdq_instruction inst = { {0} }; if (event >= CMDQ_MAX_EVENT) return -EINVAL; - /* -* WFE arg_b -* bit 0-11: wait value -* bit 15: 1 - wait, 0 - no wait -* bit 16-27: update value -* bit 31: 1 - update, 0 - no update -*/ - arg_b = CMDQ_WFE_UPDATE | CMDQ_WFE_WAIT | CMDQ_WFE_WAIT_VALUE; + inst.op = CMDQ_CODE_WFE; + inst.value = CMDQ_WFE_OPTION; + inst.event = event; - return cmdq_pkt_append_command(pkt, CMDQ_CODE_WFE, event, arg_b); + return cmdq_pkt_append_command(pkt, inst); } EXPORT_SYMBOL(cmdq_pkt_wfe); int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event) { + struct cmdq_instruction inst = { {0} }; + if (event >= CMDQ_MAX_EVENT) return -EINVAL; - return cmdq_pkt_append_command(pkt, CMDQ_CODE_WFE, event, - CMDQ_WFE_UPDATE); + inst.op = CMDQ_CODE_WFE; + inst.value = CMDQ_WFE_UPDATE; + inst.event = event; + + return cmdq_pkt_append_command(pkt, inst); } EXPORT_SYMBOL(cmdq_pkt_clear_event); static int cmdq_pkt_finalize(struct cmdq_pkt *pkt) { - int err; + struct cmdq_instruction inst = { {0} }; + int err = 0; /* insert EO
[PATCH v14 05/10] mailbox: mediatek: cmdq: support mt8183 gce function
add mt8183 compatible name for supporting gce function Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu --- drivers/mailbox/mtk-cmdq-mailbox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 8fddd26288e8..69daaadc3a5f 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -539,6 +539,7 @@ static const struct dev_pm_ops cmdq_pm_ops = { static const struct of_device_id cmdq_of_ids[] = { {.compatible = "mediatek,mt8173-gce", .data = (void *)16}, + {.compatible = "mediatek,mt8183-gce", .data = (void *)24}, {} }; -- 2.18.0
[PATCH v14 09/10] soc: mediatek: cmdq: add cmdq_dev_get_client_reg function
GCE cannot know the register base address, this function can help cmdq client to get the cmdq_client_reg structure. Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu Reviewed-by: Houlong Wei --- drivers/soc/mediatek/mtk-cmdq-helper.c | 29 ++ include/linux/soc/mediatek/mtk-cmdq.h | 21 +++ 2 files changed, 50 insertions(+) diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c index bec7bb6c3988..3037fbf206ef 100644 --- a/drivers/soc/mediatek/mtk-cmdq-helper.c +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -27,6 +27,35 @@ struct cmdq_instruction { u8 op; }; +int cmdq_dev_get_client_reg(struct device *dev, + struct cmdq_client_reg *client_reg, int idx) +{ + struct of_phandle_args spec; + int err; + + if (!client_reg) + return -ENOENT; + + err = of_parse_phandle_with_fixed_args(dev->of_node, + "mediatek,gce-client-reg", + 3, idx, &spec); + if (err < 0) { + dev_err(dev, + "error %d can't parse gce-client-reg property (%d)", + err, idx); + + return err; + } + + client_reg->subsys = (u8)spec.args[0]; + client_reg->offset = (u16)spec.args[1]; + client_reg->size = (u16)spec.args[2]; + of_node_put(spec.np); + + return 0; +} +EXPORT_SYMBOL(cmdq_dev_get_client_reg); + static void cmdq_client_timeout(struct timer_list *t) { struct cmdq_client *client = from_timer(client, t, timer); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 92bd5b5c6341..a74c1d5acdf3 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -15,6 +15,12 @@ struct cmdq_pkt; +struct cmdq_client_reg { + u8 subsys; + u16 offset; + u16 size; +}; + struct cmdq_client { spinlock_t lock; u32 pkt_cnt; @@ -24,6 +30,21 @@ struct cmdq_client { u32 timeout_ms; /* in unit of microsecond */ }; +/** + * cmdq_dev_get_client_reg() - parse cmdq client reg from the device + *node of CMDQ client + * @dev: device of CMDQ mailbox client + * @client_reg: CMDQ client reg pointer + * @idx: the index of desired reg + * + * Return: 0 for success; else the error code is returned + * + * Help CMDQ client parsing the cmdq client reg + * from the device node of CMDQ client. + */ +int cmdq_dev_get_client_reg(struct device *dev, + struct cmdq_client_reg *client_reg, int idx); + /** * cmdq_mbox_create() - create CMDQ mailbox client and channel * @dev: device of CMDQ mailbox client -- 2.18.0
[PATCH v14 06/10] mailbox: mediatek: cmdq: clear the event in cmdq initial flow
GCE hardware stored event information in own internal sysram, if the initial value in those sysram is not zero value it will cause a situation that gce can wait the event immediately after client ask gce to wait event but not really trigger the corresponding hardware. In order to make sure that the wait event function is exactly correct, we need to clear the sysram value in cmdq initial flow. Fixes: 623a6143a845 ("mailbox: mediatek: Add Mediatek CMDQ driver") Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu Reviewed-by: Matthias Brugger --- drivers/mailbox/mtk-cmdq-mailbox.c | 5 + include/linux/mailbox/mtk-cmdq-mailbox.h | 3 +++ include/linux/soc/mediatek/mtk-cmdq.h| 3 --- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index 69daaadc3a5f..9a6ce9f5a7db 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -21,6 +21,7 @@ #define CMDQ_NUM_CMD(t)(t->cmd_buf_size / CMDQ_INST_SIZE) #define CMDQ_CURR_IRQ_STATUS 0x10 +#define CMDQ_SYNC_TOKEN_UPDATE 0x68 #define CMDQ_THR_SLOT_CYCLES 0x30 #define CMDQ_THR_BASE 0x100 #define CMDQ_THR_SIZE 0x80 @@ -104,8 +105,12 @@ static void cmdq_thread_resume(struct cmdq_thread *thread) static void cmdq_init(struct cmdq *cmdq) { + int i; + WARN_ON(clk_enable(cmdq->clock) < 0); writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES); + for (i = 0; i <= CMDQ_MAX_EVENT; i++) + writel(i, cmdq->base + CMDQ_SYNC_TOKEN_UPDATE); clk_disable(cmdq->clock); } diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index ccb73422c2fa..e6f54ef6698b 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -20,6 +20,9 @@ #define CMDQ_WFE_WAIT BIT(15) #define CMDQ_WFE_WAIT_VALUE0x1 +/** cmdq event maximum */ +#define CMDQ_MAX_EVENT 0x3ff + /* * CMDQ_CODE_MASK: * set write mask diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index f3ae45d02e80..9618debb9ceb 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -13,9 +13,6 @@ #define CMDQ_NO_TIMEOUT0xu -/** cmdq event maximum */ -#define CMDQ_MAX_EVENT 0x3ff - struct cmdq_pkt; struct cmdq_client { -- 2.18.0
[PATCH v14 03/10] dt-binding: gce: add binding for gce client reg property
cmdq driver provide a function that get the relationship of sub system number from device node for client. add specification for #subsys-cells, mediatek,gce-client-reg. Signed-off-by: Bibby Hsieh Reviewed-by: Rob Herring --- .../devicetree/bindings/mailbox/mtk-gce.txt | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt index 1f7f8f2a3f49..7b13787ab13d 100644 --- a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt +++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt @@ -25,8 +25,16 @@ Required properties: Required properties for a client device: - mboxes: Client use mailbox to communicate with GCE, it should have this property and list of phandle, mailbox specifiers. -- mediatek,gce-subsys: u32, specify the sub-system id which is corresponding - to the register address. +Optional properties for a client device: +- mediatek,gce-client-reg: Specify the sub-system id which is corresponding + to the register address, it should have this property and list of phandle, + sub-system specifiers. + <&phandle subsys_number start_offset size> + phandle: Label name of a gce node. + subsys_number: specify the sub-system id which is corresponding + to the register address. + start_offset: the start offset of register address that GCE can access. + size: the total size of register address that GCE can access. Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h' or 'dt-binding/gce/mt8183-gce.h'. Such as sub-system ids, thread priority, event ids. @@ -48,9 +56,9 @@ Example for a client device: compatible = "mediatek,mt8173-mmsys"; mboxes = <&gce 0 CMDQ_THR_PRIO_LOWEST 1>, <&gce 1 CMDQ_THR_PRIO_LOWEST 1>; - mediatek,gce-subsys = ; mutex-event-eof = ; - + mediatek,gce-client-reg = <&gce SUBSYS_1400 0x3000 0x1000>, + <&gce SUBSYS_1401 0x2000 0x100>; ... }; -- 2.18.0
[PATCH v14 08/10] soc: mediatek: cmdq: add polling function
add polling function in cmdq helper functions Signed-off-by: Bibby Hsieh Reviewed-by: CK Hu Reviewed-by: Houlong Wei --- drivers/soc/mediatek/mtk-cmdq-helper.c | 30 ++ include/linux/mailbox/mtk-cmdq-mailbox.h | 1 + include/linux/soc/mediatek/mtk-cmdq.h| 32 3 files changed, 63 insertions(+) diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c index 9472526ab076..bec7bb6c3988 100644 --- a/drivers/soc/mediatek/mtk-cmdq-helper.c +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -211,6 +211,36 @@ int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event) } EXPORT_SYMBOL(cmdq_pkt_clear_event); +int cmdq_pkt_poll(struct cmdq_pkt *pkt, u8 subsys, + u16 offset, u32 value) +{ + struct cmdq_instruction inst; + + inst.op = CMDQ_CODE_POLL; + inst.value = value; + inst.offset = offset; + inst.subsys = subsys; + + return cmdq_pkt_append_command(pkt, inst); +} +EXPORT_SYMBOL(cmdq_pkt_poll); + +int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, + u16 offset, u32 value, u32 mask) +{ + struct cmdq_instruction inst = { {0} }; + int err = 0; + + inst.op = CMDQ_CODE_MASK; + inst.mask = ~mask; + err = cmdq_pkt_append_command(pkt, inst); + offset = offset | 0x1; + err |= cmdq_pkt_poll(pkt, subsys, offset, value); + + return err; +} +EXPORT_SYMBOL(cmdq_pkt_poll_mask); + static int cmdq_pkt_finalize(struct cmdq_pkt *pkt) { struct cmdq_instruction inst = { {0} }; diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index 678760548791..a4dc45fbec0a 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -55,6 +55,7 @@ enum cmdq_code { CMDQ_CODE_MASK = 0x02, CMDQ_CODE_WRITE = 0x04, + CMDQ_CODE_POLL = 0x08, CMDQ_CODE_JUMP = 0x10, CMDQ_CODE_WFE = 0x20, CMDQ_CODE_EOC = 0x40, diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 9618debb9ceb..92bd5b5c6341 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -99,6 +99,38 @@ int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event); */ int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event); +/** + * cmdq_pkt_poll() - Append polling command to the CMDQ packet, ask GCE to + * execute an instruction that wait for a specified + * hardware register to check for the value w/o mask. + * All GCE hardware threads will be blocked by this + * instruction. + * @pkt: the CMDQ packet + * @subsys:the CMDQ sub system code + * @offset:register offset from CMDQ sub system + * @value: the specified target register value + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_poll(struct cmdq_pkt *pkt, u8 subsys, + u16 offset, u32 value); + +/** + * cmdq_pkt_poll_mask() - Append polling command to the CMDQ packet, ask GCE to + * execute an instruction that wait for a specified + * hardware register to check for the value w/ mask. + * All GCE hardware threads will be blocked by this + * instruction. + * @pkt: the CMDQ packet + * @subsys:the CMDQ sub system code + * @offset:register offset from CMDQ sub system + * @value: the specified target register value + * @mask: the specified target register mask + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, + u16 offset, u32 value, u32 mask); /** * cmdq_pkt_flush_async() - trigger CMDQ to asynchronously execute the CMDQ * packet and call back at the end of done packet -- 2.18.0
[PATCH] um: Rewrite host RNG driver.
The old driver had a bug that would cause it to outright stop working if the host's /dev/random were to block. Instead of trying to track down the cause of said bug, rewriting it from scratch turned out to be a much better option as it came with a few benefits: - The new driver properly registers itself as an hardware RNG. - The code is simpler and therefore easier to maintain. - It serves as a minimal example of writing a hardware RNG driver. I also edited the Kconfig symbol to bring it up to more modern standards. Signed-off-by: Alexander Neville --- arch/um/drivers/Makefile | 3 +- arch/um/drivers/random.c | 192 - drivers/char/hw_random/Kconfig | 21 ++-- 3 files changed, 59 insertions(+), 157 deletions(-) diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 693319839f69..29b0364f267d 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -17,6 +17,7 @@ hostaudio-objs := hostaudio_kern.o ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o +uml-rng-objs := random.o LDFLAGS_pcap.o := -r $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) @@ -60,7 +61,7 @@ obj-$(CONFIG_TTY_CHAN) += tty.o obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o obj-$(CONFIG_UML_WATCHDOG) += harddog.o obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o -obj-$(CONFIG_UML_RANDOM) += random.o +obj-$(CONFIG_UML_RANDOM) += uml-rng.o # pcap_user.o must be added explicitly. USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 1d5d3057e6f1..7a3099277ebd 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -1,175 +1,75 @@ -/* Copyright (C) 2005 - 2008 Jeff Dike */ - -/* Much of this ripped from drivers/char/hw_random.c, see there for other - * copyright. +// SPDX-License-Identifier: GPL-2.0 +/* + * UML Host RNG Driver + * + * (c) Copright 2019 Alexander Neville * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. */ -#include + +#include +#include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include -/* - * core module and version information - */ -#define RNG_VERSION "1.0.0" -#define RNG_MODULE_NAME "hw_random" - -#define RNG_MISCDEV_MINOR 183 /* official */ - -/* Changed at init time, in the non-modular case, and at module load - * time, in the module case. Presumably, the module subsystem - * protects against a module being loaded twice at the same time. - */ -static int random_fd = -1; -static DECLARE_WAIT_QUEUE_HEAD(host_read_wait); - -static int rng_dev_open (struct inode *inode, struct file *filp) +static int uml_rng_read(struct hwrng *rng, void *data, size_t bufsize, + bool wait) { - /* enforce read-only access to this chrdev */ - if ((filp->f_mode & FMODE_READ) == 0) - return -EINVAL; - if ((filp->f_mode & FMODE_WRITE) != 0) - return -EINVAL; - - return 0; + return os_read_file(rng->priv, data, bufsize); } -static atomic_t host_sleep_count = ATOMIC_INIT(0); - -static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, -loff_t *offp) +static int uml_rng_init(struct hwrng *rng) { - u32 data; - int n, ret = 0, have_data; - - while (size) { - n = os_read_file(random_fd, &data, sizeof(data)); - if (n > 0) { - have_data = n; - while (have_data && size) { - if (put_user((u8) data, buf++)) { - ret = ret ? : -EFAULT; - break; - } - size--; - ret++; - have_data--; - data >>= 8; - } - } - else if (n == -EAGAIN) { - DECLARE_WAITQUEUE(wait, current); - - if (filp->f_flags & O_NONBLOCK) - return ret ? : -EAGAIN; - - atomic_inc(&host_sleep_count); - add_sigio_fd(random_fd); - - add_wait_queue(&host_read_wait, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - schedule(); - remove_wait_queue(&host_read_wait, &wait); - - if (atomic_de
Re: linux-next: Tree for Aug 27 (objtool)
Hi Josh, On Wed, 28 Aug 2019 11:34:33 -0500 Josh Poimboeuf wrote: > On Wed, Aug 28, 2019 at 11:13:31AM -0500, Josh Poimboeuf wrote: > > Turns out this patch does break something: > > > > arch/x86/xen/enlighten_pv.o: warning: objtool: xen_cpuid()+0x25: can't > > find jump dest instruction at .text+0x9c > > > > I'll need to figure out a better way to whitelist that > > XEN_EMULATE_PREFIX fake instruction thing. I'll probably just teach > > the objtool decoder about it. > > Hi Masami, > > Is it possible for the kernel x86 decoder to recognize the > XEN_EMULATE_PREFIX prefix? > > asm(XEN_EMULATE_PREFIX "cpuid" > : "=a" (*ax), > "=b" (*bx), > "=c" (*cx), > "=d" (*dx) > : "0" (*ax), "2" (*cx)); > > is disassembled to: > > 33: 0f 0b ud2 > 35: 78 65 js 9c > 37: 6e outsb %ds:(%rsi),(%dx) > 38: 0f a2 cpuid > > which confuses objtool. Presumably that would confuse other users of > the decoder as well. Good catch! It should be problematic, since x86 decoder sanity test is based on objtool. But I don't want to change the test code itself, because this problem is highly depending on Xen. > That's a highly unlikely sequence of instructions, maybe the kernel > decoder should recognize it as a single instruction. OK, it is better to be done in decoder (only for CONFIG_XEN_PVHVM) BTW, could you also share what test case would you using? And what about attached patch? (just compile checked with/without CONFIG_XEN_PVHVM) Thank you, -- Masami Hiramatsu >From 9a46833c54fd320afd3836c0e51ade82e4bc6f96 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 29 Aug 2019 10:01:55 +0900 Subject: [PATCH] x86: xen: insn: Decode XEN_EMULATE_PREFIX correctly Add XEN_EMULATE_PREFIX prefix support to x86 insn decoder. This treats a special sequence of instructions of XEN_EMULATE_PREFIX as a prefix bytes in x86 insn decoder only if CONFIG_XEN_PVHVM=y. Note that this prefix is treated as just a dummy code. Reported-by: Josh Poimboeuf Signed-off-by: Masami Hiramatsu --- arch/x86/include/asm/xen/interface.h | 8 +-- arch/x86/lib/insn.c | 35 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c65..fbee520b1f07 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -27,6 +27,8 @@ #ifndef _ASM_X86_XEN_INTERFACE_H #define _ASM_X86_XEN_INTERFACE_H +#include + /* * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field * in a struct in memory. @@ -379,11 +381,13 @@ struct xen_pmu_arch { * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e +#define __XEN_EMULATE_PREFIX_STR __stringify(__XEN_EMULATE_PREFIX) #ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_EMULATE_PREFIX .byte __XEN_EMULATE_PREFIX ; #define XEN_CPUID XEN_EMULATE_PREFIX cpuid #else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_EMULATE_PREFIX ".byte " __XEN_EMULATE_PREFIX_STR " ; " #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 0b5862ba6a75..2401a6fc9509 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -7,6 +7,9 @@ #ifdef __KERNEL__ #include +#include +/* For special Xen prefix */ +#include #else #include #endif @@ -58,6 +61,34 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) insn->addr_bytes = 4; } +#ifdef CONFIG_XEN_PVHVM +static const insn_byte_t xen_prefix[] = { XEN_EMULATE_PREFIX }; + +static int insn_xen_prefix(struct insn *insn, insn_byte_t b) +{ + struct insn_field *prefixes = &insn->prefixes; + int i = 0; + + while (i < ARRAY_SIZE(xen_prefix) && b == xen_prefix[i]) + b = peek_nbyte_next(insn_byte_t, insn, ++i); + + if (unlikely(i == ARRAY_SIZE(xen_prefix))) { + memcpy(prefixes->bytes, xen_prefix, 3); + prefixes->bytes[3] = xen_prefix[ARRAY_SIZE(xen_prefix) - 1]; + prefixes->nbytes = ARRAY_SIZE(xen_prefix); + insn->next_byte += prefixes->nbytes; + prefixes->got = 1; + + return 1; + } + +err_out: + return 0; +} +#else +#define insn_xen_prefix(insn,b) (0) +#endif + /** * insn_get_prefixes - scan x86 instruction prefix bytes * @insn: &struct insn containing instruction @@ -79,6 +110,10 @@ void insn_get_prefixes(struct insn *insn) nb = 0; lb = 0; b = peek_next(insn_byte_t, insn); + + if (insn_xen_prefix(insn, b)) + return; + attr = inat_get_opcode_attribute(b); while (inat_is_legacy_prefix(attr)) { /* Skip if same prefix */ -- 2.20.1
[PATCH v4] watchdog: orion_wdt: use timer1 as a pretimeout
The orion watchdog can either reset the CPU or generate an interrupt. The interrupt would be useful for debugging as it provides panic() output about the watchdog expiry, however if the interrupt is used the watchdog can't reset the CPU in the event of being stuck in a loop with interrupts disabled or if the CPU is prevented from accessing memory (e.g. an unterminated DMA). The Armada SoCs have spare timers that aren't currently used by the Linux kernel. We can use timer1 to provide a pre-timeout ahead of the watchdog timer and provide the possibility of gathering debug before the reset triggers. Signed-off-by: Chris Packham --- This was submitted previously[1], the other patches two from the series have been picked up but this one seems to have fallen through the gaps. Changes in v3: - rebase against linux/master Changes in v2: - apply changes to armada-38x only [1] - https://lore.kernel.org/linux-watchdog/20190305201924.14853-4-chris.pack...@alliedtelesis.co.nz/ drivers/watchdog/orion_wdt.c | 59 ++-- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index cdb0d174c5e2..f2e90bfd7186 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -46,6 +46,11 @@ #define WDT_AXP_FIXED_ENABLE_BIT BIT(10) #define WDT_A370_EXPIRED BIT(31) +#define TIMER1_VAL_OFF 0x001c +#define TIMER1_ENABLE_BIT BIT(2) +#define TIMER1_FIXED_ENABLE_BITBIT(12) +#define TIMER1_STATUS_BIT BIT(8) + static bool nowayout = WATCHDOG_NOWAYOUT; static int heartbeat = -1; /* module parameter (seconds) */ @@ -158,6 +163,7 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, struct orion_watchdog *dev) { int ret; + u32 val; dev->clk = of_clk_get_by_name(pdev->dev.of_node, "fixed"); if (IS_ERR(dev->clk)) @@ -169,38 +175,48 @@ static int armadaxp_wdt_clock_init(struct platform_device *pdev, } /* Enable the fixed watchdog clock input */ - atomic_io_modify(dev->reg + TIMER_CTRL, -WDT_AXP_FIXED_ENABLE_BIT, -WDT_AXP_FIXED_ENABLE_BIT); + val = WDT_AXP_FIXED_ENABLE_BIT | TIMER1_FIXED_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, val, val); dev->clk_rate = clk_get_rate(dev->clk); + return 0; } static int orion_wdt_ping(struct watchdog_device *wdt_dev) { struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev); + /* Reload watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); + return 0; } static int armada375_start(struct watchdog_device *wdt_dev) { struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev); - u32 reg; + u32 reg, val; /* Set watchdog duration */ writel(dev->clk_rate * wdt_dev->timeout, dev->reg + dev->data->wdt_counter_offset); + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + writel(dev->clk_rate * (wdt_dev->timeout - wdt_dev->pretimeout), + dev->reg + TIMER1_VAL_OFF); /* Clear the watchdog expiration bit */ atomic_io_modify(dev->reg + TIMER_A370_STATUS, WDT_A370_EXPIRED, 0); /* Enable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, - dev->data->wdt_enable_bit); + val = dev->data->wdt_enable_bit; + if (dev->wdt.info->options & WDIOF_PRETIMEOUT) + val |= TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, val, val); /* Enable reset on watchdog */ reg = readl(dev->rstout); @@ -277,7 +293,7 @@ static int orion_stop(struct watchdog_device *wdt_dev) static int armada375_stop(struct watchdog_device *wdt_dev) { struct orion_watchdog *dev = watchdog_get_drvdata(wdt_dev); - u32 reg; + u32 reg, mask; /* Disable reset on watchdog */ atomic_io_modify(dev->rstout_mask, dev->data->rstout_mask_bit, @@ -287,7 +303,10 @@ static int armada375_stop(struct watchdog_device *wdt_dev) writel(reg, dev->rstout); /* Disable watchdog timer */ - atomic_io_modify(dev->reg + TIMER_CTRL, dev->data->wdt_enable_bit, 0); + mask = dev->data->wdt_enable_bit; + if (wdt_dev->info->options & WDIOF_PRETIMEOUT) + mask += TIMER1_ENABLE_BIT; + atomic_io_modify(dev->reg + TIMER_CTRL, mask, 0); return 0; } @@ -349,7 +368,7 @@ static unsigned int orion_wdt_get_timeleft(struct watchdog_device *wdt_dev) return readl(dev->reg + dev->da
Re: [PATCH v6 00/12] implement KASLR for powerpc/fsl_booke/32
On 2019/8/28 12:05, Scott Wood wrote: On Fri, 2019-08-09 at 18:07 +0800, Jason Yan wrote: This series implements KASLR for powerpc/fsl_booke/32, as a security feature that deters exploit attempts relying on knowledge of the location of kernel internals. Since CONFIG_RELOCATABLE has already supported, what we need to do is map or copy kernel to a proper place and relocate. Have you tested this with a kernel that was loaded at a non-zero address? I tried loading a kernel at 0x0400 (by changing the address in the uImage, and setting bootm_low to 0400 in U-Boot), and it works without CONFIG_RANDOMIZE and fails with. Not yet. I will test this kind of cases in the next days. Thank you so much. If there are any other corner cases that have to be tested, please let me know. Freescale Book-E parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 entries are not suitable to map the kernel directly in a randomized region, so we chose to copy the kernel to a proper place and restart to relocate. Entropy is derived from the banner and timer base, which will change every build and boot. This not so much safe so additionally the bootloader may pass entropy via the /chosen/kaslr-seed node in device tree. How complicated would it be to directly access the HW RNG (if present) that early in the boot? It'd be nice if a U-Boot update weren't required (and particularly concerning that KASLR would appear to work without a U-Boot update, but without decent entropy). -Scott .
Re: [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-)
On Mon, Aug 26, 2019 at 03:55:10PM +1000, Dave Chinner wrote: > On Fri, Aug 23, 2019 at 10:08:36PM -0700, Ira Weiny wrote: > > On Sat, Aug 24, 2019 at 10:11:24AM +1000, Dave Chinner wrote: > > > On Fri, Aug 23, 2019 at 09:04:29AM -0300, Jason Gunthorpe wrote: > > > > > > > IMHO it is wrong to try and create a model where the file lease exists > > > > independently from the kernel object relying on it. In other words the > > > > IB MR object itself should hold a reference to the lease it relies > > > > upon to function properly. > > > > > > That still doesn't work. Leases are not individually trackable or > > > reference counted objects objects - they are attached to a struct > > > file bUt, in reality, they are far more restricted than a struct > > > file. > > > > > > That is, a lease specifically tracks the pid and the _open fd_ it > > > was obtained for, so it is essentially owned by a specific process > > > context. Hence a lease is not able to be passed to a separate > > > process context and have it still work correctly for lease break > > > notifications. i.e. the layout break signal gets delivered to > > > original process that created the struct file, if it still exists > > > and has the original fd still open. It does not get sent to the > > > process that currently holds a reference to the IB context. But this is an exclusive layout lease which does not send a signal. There is no way to break it. > > > > > > > The fcntl man page says: > > > > "Leases are associated with an open file description (see open(2)). This > > means > > that duplicate file descriptors (created by, for example, fork(2) or dup(2)) > > refer to the same lease, and this lease may be modified or released using > > any > > of these descriptors. Furthermore, the lease is released by either an > > explicit F_UNLCK operation on any of these duplicate file descriptors, or > > when > > all such file descriptors have been closed." > > Right, the lease is attached to the struct file, so it follows > where-ever the struct file goes. That doesn't mean it's actually > useful when the struct file is duplicated and/or passed to another > process. :/ > > AFAICT, the problem is that when we take another reference to the > struct file, or when the struct file is passed to a different > process, nothing updates the lease or lease state attached to that > struct file. Ok, I probably should have made this more clear in the cover letter but _only_ the process which took the lease can actually pin memory. That pinned memory _can_ be passed to another process but those sub-process' can _not_ use the original lease to pin _more_ of the file. They would need to take their own lease to do that. Sorry for not being clear on that. > > > From this I took it that the child process FD would have the lease as well > > _and_ could release it. I _assumed_ that applied to SCM_RIGHTS but it does > > not > > seem to work the same way as dup() so I'm not so sure. > > Sure, that part works because the struct file is passed. It doesn't > end up with the same fd number in the other process, though. > > The issue is that layout leases need to notify userspace when they > are broken by the kernel, so a lease stores the owner pid/tid in the > file->f_owner field via __f_setown(). It also keeps a struct fasync > attached to the file_lock that records the fd that the lease was > created on. When a signal needs to be sent to userspace for that > lease, we call kill_fasync() and that walks the list of fasync > structures on the lease and calls: > > send_sigio(fown, fa->fa_fd, band); > > And it does for every fasync struct attached to a lease. Yes, a > lease can track multiple fds, but it can only track them in a single > process context. The moment the struct file is shared with another > process, the lease is no longer capable of sending notifications to > all the lease holders. > > Yes, you can change the owning process via F_SETOWNER, but that's > still only a single process context, and you can't change the fd in > the fasync list. You can add new fd to an existing lease by calling > F_SETLEASE on the new fd, but you still only have a single process > owner context for signal delivery. > > As such, leases that require callbacks to userspace are currently > only valid within the process context the lease was taken in. But for long term pins we are not requiring callbacks. > Indeed, even closing the fd the lease was taken on without > F_UNLCKing it first doesn't mean the lease has been torn down if > there is some other reference to the struct file. That means the > original lease owner will still get SIGIO delivered to that fd on a > lease break regardless of whether it is open or not. ANd if we > implement "layout lease not released within SIGIO response timeout" > then that process will get killed, despite the fact it may not even > have a reference to that file anymore. I'm not seeing that as a problem. This is all a result o
RE: [PATCH v2 08/10] PCI: layerscape: Add EP mode support for ls1088a and ls2088a
> -Original Message- > From: Andrew Murray > Sent: 2019年8月28日 17:01 > To: Xiaowei Bao > Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com; > shawn...@kernel.org; Leo Li ; kis...@ti.com; > lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; M.h. > Lian ; Mingkai Hu ; Roy > Zang ; jingooh...@gmail.com; > gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org; > devicet...@vger.kernel.org; linux-kernel@vger.kernel.org; > linux-arm-ker...@lists.infradead.org; linuxppc-...@lists.ozlabs.org > Subject: Re: [PATCH v2 08/10] PCI: layerscape: Add EP mode support for > ls1088a and ls2088a > > On Wed, Aug 28, 2019 at 04:29:32AM +, Xiaowei Bao wrote: > > > > > > > -Original Message- > > > From: Andrew Murray > > > Sent: 2019年8月27日 21:34 > > > To: Xiaowei Bao > > > Cc: bhelg...@google.com; robh...@kernel.org; mark.rutl...@arm.com; > > > shawn...@kernel.org; Leo Li ; kis...@ti.com; > > > lorenzo.pieral...@arm.co; a...@arndb.de; gre...@linuxfoundation.org; > M.h. > > > Lian ; Mingkai Hu ; Roy > > > Zang ; jingooh...@gmail.com; > > > gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org; > > > devicet...@vger.kernel.org; linux-kernel@vger.kernel.org; > > > linux-arm-ker...@lists.infradead.org; linuxppc-...@lists.ozlabs.org > > > Subject: Re: [PATCH v2 08/10] PCI: layerscape: Add EP mode support > > > for ls1088a and ls2088a > > > > > > On Mon, Aug 26, 2019 at 09:49:35AM +, Xiaowei Bao wrote: > > > > > > > > > > > > > -Original Message- > > > > > From: Andrew Murray > > > > > Sent: 2019年8月23日 22:28 > > > > > To: Xiaowei Bao > > > > > Cc: bhelg...@google.com; robh...@kernel.org; > > > > > mark.rutl...@arm.com; shawn...@kernel.org; Leo Li > > > > > ; kis...@ti.com; lorenzo.pieral...@arm.co; > > > > > a...@arndb.de; gre...@linuxfoundation.org; > > > M.h. > > > > > Lian ; Mingkai Hu ; > > > > > Roy Zang ; jingooh...@gmail.com; > > > > > gustavo.pimen...@synopsys.com; linux-...@vger.kernel.org; > > > > > devicet...@vger.kernel.org; linux-kernel@vger.kernel.org; > > > > > linux-arm-ker...@lists.infradead.org; > > > > > linuxppc-...@lists.ozlabs.org > > > > > Subject: Re: [PATCH v2 08/10] PCI: layerscape: Add EP mode > > > > > support for ls1088a and ls2088a > > > > > > > > > > On Thu, Aug 22, 2019 at 07:22:40PM +0800, Xiaowei Bao wrote: > > > > > > Add PCIe EP mode support for ls1088a and ls2088a, there are > > > > > > some difference between LS1 and LS2 platform, so refactor the > > > > > > code of the EP driver. > > > > > > > > > > > > Signed-off-by: Xiaowei Bao > > > > > > --- > > > > > > v2: > > > > > > - New mechanism for layerscape EP driver. > > > > > > > > > > Was there a v1 of this patch? > > > > > > > > > > > > > > > > > drivers/pci/controller/dwc/pci-layerscape-ep.c | 76 > > > > > > -- > > > > > > 1 file changed, 58 insertions(+), 18 deletions(-) > > > > > > > > > > > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > > > > b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > > > > index 7ca5fe8..2a66f07 100644 > > > > > > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > > > > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c > > > > > > @@ -20,27 +20,29 @@ > > > > > > > > > > > > #define PCIE_DBI2_OFFSET 0x1000 /* DBI2 base address*/ > > > > > > > > > > > > -struct ls_pcie_ep { > > > > > > - struct dw_pcie *pci; > > > > > > - struct pci_epc_features *ls_epc; > > > > > > +#define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) > > > > > > + > > > > > > +struct ls_pcie_ep_drvdata { > > > > > > + u32 func_offset; > > > > > > + const struct dw_pcie_ep_ops *ops; > > > > > > + const struct dw_pcie_ops*dw_pcie_ops; > > > > > > }; > > > > > > > > > > > > -#define to_ls_pcie_ep(x) dev_get_drvdata((x)->dev) > > > > > > +struct ls_pcie_ep { > > > > > > + struct dw_pcie *pci; > > > > > > + struct pci_epc_features *ls_epc; > > > > > > + const struct ls_pcie_ep_drvdata *drvdata; }; > > > > > > > > > > > > static int ls_pcie_establish_link(struct dw_pcie *pci) { > > > > > > return 0; > > > > > > } > > > > > > > > > > > > -static const struct dw_pcie_ops ls_pcie_ep_ops = { > > > > > > +static const struct dw_pcie_ops dw_ls_pcie_ep_ops = { > > > > > > .start_link = ls_pcie_establish_link, }; > > > > > > > > > > > > -static const struct of_device_id ls_pcie_ep_of_match[] = { > > > > > > - { .compatible = "fsl,ls-pcie-ep",}, > > > > > > - { }, > > > > > > -}; > > > > > > - > > > > > > static const struct pci_epc_features* > > > > > > ls_pcie_ep_get_features(struct dw_pcie_ep *ep) { @@ -82,10 > > > > > > +84,44 @@ static int ls_pcie_ep_raise_irq(struct dw_pcie_ep *ep, > u8 func_no, > > > > > > } > > > > > > } > > > > > > > > > > > > -static const struct dw_pcie_ep_ops pcie_ep_ops = { > > > > > > +static unsigned int ls_pcie_ep_func_conf_select(struct > > > > > > +dw_pcie_ep > >
Re: [PATCH 3/4] KVM: selftests: Introduce VM_MODE_PXXV48_4K
On Wed, Aug 28, 2019 at 01:41:04PM +0200, Andrew Jones wrote: [...] > > +#define DEBUG printf > > If this is going to be some general thing, then maybe we should do it > like this > > #ifndef NDEBUG > #define dprintf printf > #endif Ok. > > > > + > > /* Minimum allocated guest virtual and physical addresses */ > > #define KVM_UTIL_MIN_VADDR 0x2000 > > > > @@ -38,12 +40,19 @@ enum vm_guest_mode { > > VM_MODE_P48V48_64K, > > VM_MODE_P40V48_4K, > > VM_MODE_P40V48_64K, > > + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ > > NUM_VM_MODES, > > }; > > > > #ifdef __aarch64__ > > #define VM_MODE_DEFAULT VM_MODE_P40V48_4K > > -#else > > +#endif > > + > > +#ifdef __x86_64__ > > +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K > > +#endif > > + > > +#ifndef VM_MODE_DEFAULT > > #define VM_MODE_DEFAULT VM_MODE_P52V48_4K > > #endif > > nit: how about > > #if defined(__aarch64__) > ... > #elif defined(__x86_64__) > ... > #else > ... > #endif Yes, better! > > > > > diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c > > b/tools/testing/selftests/kvm/lib/aarch64/processor.c > > index 486400a97374..86036a59a668 100644 > > --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c > > +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c > > @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, > > struct kvm_vcpu_init *ini > > case VM_MODE_P52V48_4K: > > TEST_ASSERT(false, "AArch64 does not support 4K sized pages " > >"with 52-bit physical address ranges"); > > + case VM_MODE_PXXV48_4K: > > + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " > > + "with ANY-bit physical address ranges"); > > case VM_MODE_P52V48_64K: > > tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ > > tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ > > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c > > b/tools/testing/selftests/kvm/lib/kvm_util.c > > index 0c7c4368bc14..8c6f872a8793 100644 > > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > > @@ -8,6 +8,7 @@ > > #include "test_util.h" > > #include "kvm_util.h" > > #include "kvm_util_internal.h" > > +#include "processor.h" > > > > #include > > #include > > @@ -101,12 +102,13 @@ static void vm_open(struct kvm_vm *vm, int perm) > > } > > > > const char * const vm_guest_mode_string[] = { > > - "PA-bits:52, VA-bits:48, 4K pages", > > - "PA-bits:52, VA-bits:48, 64K pages", > > - "PA-bits:48, VA-bits:48, 4K pages", > > - "PA-bits:48, VA-bits:48, 64K pages", > > - "PA-bits:40, VA-bits:48, 4K pages", > > - "PA-bits:40, VA-bits:48, 64K pages", > > + "PA-bits:52, VA-bits:48, 4K pages", > > + "PA-bits:52, VA-bits:48, 64K pages", > > + "PA-bits:48, VA-bits:48, 4K pages", > > + "PA-bits:48, VA-bits:48, 64K pages", > > + "PA-bits:40, VA-bits:48, 4K pages", > > + "PA-bits:40, VA-bits:48, 64K pages", > > + "PA-bits:ANY, VA-bits:48, 4K pages", > > nit: while formatting we could align the 'K's in the 64/4K column Ok. > > > }; > > _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, > >"Missing new mode strings?"); > > @@ -185,6 +187,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, > > uint64_t phy_pages, > > vm->page_size = 0x1; > > vm->page_shift = 16; > > break; > > + case VM_MODE_PXXV48_4K: > > +#ifdef __x86_64__ > > + { > > + struct kvm_cpuid_entry2 *entry; > > + > > + /* SDM 4.1.4 */ > > + entry = kvm_get_supported_cpuid_entry(0x8008); > > + if (entry) { > > + vm->pa_bits = entry->eax & 0xff; > > + vm->va_bits = (entry->eax >> 8) & 0xff; > > + } else { > > + vm->pa_bits = vm->va_bits = 32; > > + } > > + TEST_ASSERT(vm->va_bits == 48, "Linear address width " > > + "(%d bits) not supported", vm->va_bits); > > + vm->pgtable_levels = 4; > > + vm->page_size = 0x1000; > > + vm->page_shift = 12; > > + DEBUG("Guest physical address width detected: %d\n", > > + vm->pa_bits); > > + } > > How about making this a function that lives in x86_64/processor.c? Done. Also I fixed up the overlooked PAE calculation which should be 36 rather than 32 bits PA. > > > +#else > > + TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " > > + "non-x86 platforms"); > > This should make the above aarch64_vcpu_setup() change unnecessary. I tend to prefer keeping both because if some non-arm arch removed it then aarch64 has another chance to assert. Tha
Re: [PATCH 4/4] KVM: selftests: Remove duplicate guest mode handling
On Wed, Aug 28, 2019 at 01:46:13PM +0200, Andrew Jones wrote: [...] > > +unsigned int vm_get_page_size(struct kvm_vm *vm) > > +{ > > + return vm->page_size; > > +} > > + > > +unsigned int vm_get_page_shift(struct kvm_vm *vm) > > +{ > > + return vm->page_shift; > > +} > > We could get by with just one of the above two, but whatever Right... and imho if we export kvm_vm struct we don't even any helpers. :) But I didn't touch that. > > + > > +unsigned int vm_get_max_gfn(struct kvm_vm *vm) > > +{ > > + return vm->max_gfn; > > +} > > -- > > 2.21.0 > > > > Reviewed-by: Andrew Jones Thanks! -- Peter Xu
[PATCH v2 2/4] KVM: selftests: Create VM earlier for dirty log test
Since we've just removed the dependency of vm type in previous patch, now we can create the vm much earlier. Note that to move it earlier we used an approximation of number of extra pages but it should be fine. This prepares for the follow up patches to finally remove the duplication of guest mode parsings. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu --- tools/testing/selftests/kvm/dirty_log_test.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 135cba5c6d0d..efb7746a7e99 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -230,6 +230,9 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -239,6 +242,18 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, uint64_t max_gfn; unsigned long *bmap; + /* +* We reserve page table for 2 times of extra dirty mem which +* will definitely cover the original (1G+) test range. Here +* we do the calculation with 4K page size which is the +* smallest so the page number will be enough for all archs +* (e.g., 64K page size guest will need even less memory for +* page tables). +*/ + vm = create_vm(mode, VCPU_ID, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), + guest_code); + switch (mode) { case VM_MODE_P52V48_4K: guest_pa_bits = 52; @@ -285,7 +300,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - guest_page_shift)) + 16; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -302,8 +317,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); - #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; -- 2.21.0
[PATCH v2 1/4] KVM: selftests: Move vm type into _vm_create() internally
Rather than passing the vm type from the top level to the end of vm creation, let's simply keep that as an internal of kvm_vm struct and decide the type in _vm_create(). Several reasons for doing this: - The vm type is only decided by physical address width and currently only used in aarch64, so we've got enough information as long as we're passing vm_guest_mode into _vm_create(), - This removes a loop dependency between the vm->type and creation of vms. That's why now we need to parse vm_guest_mode twice sometimes, once in run_test() and then again in _vm_create(). The follow up patches will move on to clean up that as well so we can have a single place to decide guest machine types and so. Note that this patch will slightly change the behavior of aarch64 tests in that previously most vm_create() callers will directly pass in type==0 into _vm_create() but now the type will depend on vm_guest_mode, however it shouldn't affect any user because all vm_create() users of aarch64 will be using VM_MODE_DEFAULT guest mode (which is VM_MODE_P40V48_4K) so at last type will still be zero. Signed-off-by: Peter Xu --- tools/testing/selftests/kvm/dirty_log_test.c | 13 +++- .../testing/selftests/kvm/include/kvm_util.h | 3 +-- tools/testing/selftests/kvm/lib/kvm_util.c| 21 --- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..135cba5c6d0d 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -216,14 +216,12 @@ static void vm_dirty_log_verify(unsigned long *bmap) } static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code, - unsigned long type) + uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR, type); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -240,7 +238,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; uint64_t max_gfn; unsigned long *bmap; - unsigned long type = 0; switch (mode) { case VM_MODE_P52V48_4K: @@ -281,10 +278,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * bits we can change to 39. */ guest_pa_bits = 39; -#endif -#ifdef __aarch64__ - if (guest_pa_bits != 40) - type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); #endif max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; guest_page_size = (1ul << guest_page_shift); @@ -309,7 +302,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); + vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code); #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..c78faa2ff7f3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -60,8 +60,7 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6e49bb039376..34a8a6572c7c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -84,7 +84,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) +static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) @@ -95,7 +95,7 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) exit(KSFT_SKIP); } - vm->fd = ioctl(vm->kv
[PATCH v2 0/4] KVM: selftests: Introduce VM_MODE_PXXV48_4K
v2: - pick r-bs - rebased to master - fix pa width detect, check cpuid(1):edx.PAE(bit 6) - fix arm compilation issue [Drew] - fix indents issues and ways to define macros [Drew] - provide functions for fetching cpu pa/va bits [Drew] This series originates from "[PATCH] KVM: selftests: Detect max PA width from cpuid" [1] and one of Drew's comments - instead of keeping the hackish line to overwrite guest_pa_bits all the time, this series introduced the new mode VM_MODE_PXXV48_4K for x86_64 platform. The major issue is that even all the x86_64 kvm selftests are currently using the guest mode VM_MODE_P52V48_4K, many x86_64 hosts are not using 52 bits PA (and in most cases, far less). If with luck we could be having 48 bits hosts, but it's more adhoc (I've observed 3 x86_64 systems, they are having different PA width of 36, 39, 48). I am not sure whether this is happening to the other archs as well, but it probably makes sense to bring the x86_64 tests to the real world on always using the correct PA bits. A side effect of this series is that it will also fix the crash we've encountered on Xeon E3-1220 as mentioned [1] due to the differenciation of PA width. With [1], we've observed AMD host issues when with NPT=off. However a funny fact is that after I reworked into this series, the tests can instead pass on both NPT=on/off. It could be that the series changes vm->pa_bits or other fields so something was affected. I didn't dig more on that though, considering we should not lose anything. [1] https://lkml.org/lkml/2019/8/26/141 Peter Xu (4): KVM: selftests: Move vm type into _vm_create() internally KVM: selftests: Create VM earlier for dirty log test KVM: selftests: Introduce VM_MODE_PXXV48_4K KVM: selftests: Remove duplicate guest mode handling tools/testing/selftests/kvm/dirty_log_test.c | 79 +-- .../testing/selftests/kvm/include/kvm_util.h | 16 +++- .../selftests/kvm/include/x86_64/processor.h | 3 + .../selftests/kvm/lib/aarch64/processor.c | 3 + tools/testing/selftests/kvm/lib/kvm_util.c| 67 .../selftests/kvm/lib/x86_64/processor.c | 30 ++- 6 files changed, 119 insertions(+), 79 deletions(-) -- 2.21.0
[PATCH v2 4/4] KVM: selftests: Remove duplicate guest mode handling
Remove the duplication code in run_test() of dirty_log_test because after some reordering of functions now we can directly use the outcome of vm_create(). Meanwhile, with the new VM_MODE_PXXV48_4K, we can safely revert b442324b58 too where we stick the x86_64 PA width to 39 bits for dirty_log_test. Reviewed-by: Andrew Jones Signed-off-by: Peter Xu --- tools/testing/selftests/kvm/dirty_log_test.c | 52 ++- .../testing/selftests/kvm/include/kvm_util.h | 4 ++ tools/testing/selftests/kvm/lib/kvm_util.c| 17 ++ 3 files changed, 26 insertions(+), 47 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index c86f83cb33e5..89fac11733a5 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -234,10 +234,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t max_gfn; unsigned long *bmap; /* @@ -252,60 +250,20 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code); - switch (mode) { - case VM_MODE_P52V48_4K: - case VM_MODE_PXXV48_4K: - guest_pa_bits = 52; - guest_page_shift = 12; - break; - case VM_MODE_P52V48_64K: - guest_pa_bits = 52; - guest_page_shift = 16; - break; - case VM_MODE_P48V48_4K: - guest_pa_bits = 48; - guest_page_shift = 12; - break; - case VM_MODE_P48V48_64K: - guest_pa_bits = 48; - guest_page_shift = 16; - break; - case VM_MODE_P40V48_4K: - guest_pa_bits = 40; - guest_page_shift = 12; - break; - case VM_MODE_P40V48_64K: - guest_pa_bits = 40; - guest_page_shift = 16; - break; - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); - } - - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - -#ifdef __x86_64__ - /* -* FIXME -* The x86_64 kvm selftests framework currently only supports a -* single PML4 which restricts the number of physical address -* bits we can change to 39. -*/ - guest_pa_bits = 39; -#endif - max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; - guest_page_size = (1ul << guest_page_shift); + guest_page_size = vm_get_page_size(vm); /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (DIRTY_MEM_BITS - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - + vm_get_page_shift(vm))) + 16; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); if (!phys_offset) { - guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm_get_max_gfn(vm) - + guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = phys_offset; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 430edbacb9b2..070e3ba193a6 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -152,6 +152,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned int vm_get_max_gfn(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index bb8f993b25fb..80a338b5403c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -136,6 +136,8 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; + DEBUG("Testing guest mode: %s\n", vm_guest_
[PATCH v2 3/4] KVM: selftests: Introduce VM_MODE_PXXV48_4K
The naming VM_MODE_P52V48_4K is explicit but unclear when used on x86_64 machines, because x86_64 machines are having various physical address width rather than some static values. Here's some examples: - Intel Xeon E3-1220: 36 bits - Intel Core i7-8650: 39 bits - AMD EPYC 7251: 48 bits All of them are using 48 bits linear address width but with totally different physical address width (and most of the old machines should be less than 52 bits). Let's create a new guest mode called VM_MODE_PXXV48_4K for current x86_64 tests and make it as the default to replace the old naming of VM_MODE_P52V48_4K because it shows more clearly that the PA width is not really a constant. Meanwhile we also stop assuming all the x86 machines are having 52 bits PA width but instead we fetch the real vm->pa_bits from CPUID 0x8008 during runtime. We currently make this exclusively used by x86_64 but no other arch. As a slight touch up, moving DEBUG macro from dirty_log_test.c to kvm_util.h so lib can use it too. Signed-off-by: Peter Xu --- tools/testing/selftests/kvm/dirty_log_test.c | 5 ++-- .../testing/selftests/kvm/include/kvm_util.h | 9 +- .../selftests/kvm/include/x86_64/processor.h | 3 ++ .../selftests/kvm/lib/aarch64/processor.c | 3 ++ tools/testing/selftests/kvm/lib/kvm_util.c| 29 ++ .../selftests/kvm/lib/x86_64/processor.c | 30 --- 6 files changed, 65 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index efb7746a7e99..c86f83cb33e5 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -19,8 +19,6 @@ #include "kvm_util.h" #include "processor.h" -#define DEBUG printf - #define VCPU_ID1 /* The memory slot index to track dirty pages */ @@ -256,6 +254,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, switch (mode) { case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: guest_pa_bits = 52; guest_page_shift = 12; break; @@ -446,7 +445,7 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c78faa2ff7f3..430edbacb9b2 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,6 +24,10 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#ifndef DEBUG +#define DEBUG printf +#endif + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 @@ -38,11 +42,14 @@ enum vm_guest_mode { VM_MODE_P48V48_64K, VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ NUM_VM_MODES, }; -#ifdef __aarch64__ +#if defined(__aarch64__) #define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#elif defined(__x86_64__) +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K #else #define VM_MODE_DEFAULT VM_MODE_P52V48_4K #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 80d19740d2dc..0c17f2ee685e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, uint64_t msr_value); +uint32_t kvm_get_cpuid_max(void); +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 486400a97374..86036a59a668 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini case VM_MODE_P52V48_4K: TEST_ASSERT(false, "AArch64 does not support 4K sized pages " "with 52-bit physical address ranges"); + case VM_MODE_PXXV48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K:
[v5] rtc: pcf85363/pcf85263: fix error that failed to run hwclock -w
Issue: - # hwclock -w hwclock: RTC_SET_TIME: Invalid argument Why: - Relative commit: 8b9f9d4dc511309918c4f6793bae7387c0c638af, this patch will always check for unwritable registers, it will compare reg with max_register in regmap_writeable. - The pcf85363/pcf85263 has the capability of address wrapping which means if you access an address outside the allowed range (0x00-0x2f) hardware actually wraps the access to a lower address. The rtc-pcf85363 driver will use this feature to configure the time and execute 2 actions in the same i2c write operation (stopping the clock and configure the time). However the driver has also configured the `regmap maxregister` protection mechanism that will block accessing addresses outside valid range (0x00-0x2f). How: - Split of writing regs to two parts, first part writes control registers about stop_enable and resets, second part writes RTC time and date registers. Signed-off-by: Biwen Li --- Change in v5: - drop robust explanation Change in v4: - use old scheme - replace link to lkml.org with commit - add proper explanation Change in v3: - replace old scheme with new scheme: increase max_register. Change in v2: - add Why and How into commit message. drivers/rtc/rtc-pcf85363.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-pcf85363.c b/drivers/rtc/rtc-pcf85363.c index a075e77617dc..3450d615974d 100644 --- a/drivers/rtc/rtc-pcf85363.c +++ b/drivers/rtc/rtc-pcf85363.c @@ -166,7 +166,12 @@ static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm) buf[DT_YEARS] = bin2bcd(tm->tm_year % 100); ret = regmap_bulk_write(pcf85363->regmap, CTRL_STOP_EN, - tmp, sizeof(tmp)); + tmp, 2); + if (ret) + return ret; + + ret = regmap_bulk_write(pcf85363->regmap, DT_100THS, + buf, sizeof(tmp) - 2); if (ret) return ret; -- 2.17.1
Re: mmotm 2019-08-27-20-39 uploaded (sound/hda/intel-nhlt.c)
On 8/28/19 3:59 PM, Randy Dunlap wrote: > On 8/28/19 3:45 PM, Pierre-Louis Bossart wrote: >> >>>>> I just checked with Mark Brown's for-next tree >>>>> 8aceffa09b4b9867153bfe0ff6f40517240cee12 >>>>> and things are fine in i386 mode, see below. >>>>> >>>>> next-20190828 also works fine for me in i386 mode. >>>>> >>>>> if you can point me to a tree and configuration that don't work I'll look >>>>> into this, I'd need more info to progress. >>>> >>>> Please try the attached randconfig file. >>>> >>>> Thanks for looking. >>> >>> Ack, I see some errors as well with this config. Likely a missing >>> dependency somewhere, working on this now. >> >> My bad, I added a fallback with static inline functions in the .h file when >> ACPI is not defined, but the .c file was still compiled. >> >> The diff below makes next-20190828 compile with Randy's config. >> >> It looks like the alsa-devel server is down btw? >> >> diff --git a/sound/hda/Makefile b/sound/hda/Makefile >> index 8560f6ef1b19..b3af071ce06b 100644 >> --- a/sound/hda/Makefile >> +++ b/sound/hda/Makefile >> @@ -14,5 +14,7 @@ obj-$(CONFIG_SND_HDA_CORE) += snd-hda-core.o >> #extended hda >> obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/ >> >> +ifdef CONFIG_ACPI >> snd-intel-nhlt-objs := intel-nhlt.o >> obj-$(CONFIG_SND_INTEL_NHLT) += snd-intel-nhlt.o >> +endif >> > > works for me. Thanks. > Acked-by: Randy Dunlap # build-tested > although this Makefile change should not be needed and the dependencies should be handled correctly in Kconfig files. -- ~Randy
Re: [PATCH v6 00/12] implement KASLR for powerpc/fsl_booke/32
On 2019/8/28 12:59, Scott Wood wrote: On Tue, 2019-08-27 at 23:05 -0500, Scott Wood wrote: On Fri, 2019-08-09 at 18:07 +0800, Jason Yan wrote: Freescale Book-E parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 entries are not suitable to map the kernel directly in a randomized region, so we chose to copy the kernel to a proper place and restart to relocate. Entropy is derived from the banner and timer base, which will change every build and boot. This not so much safe so additionally the bootloader may pass entropy via the /chosen/kaslr-seed node in device tree. How complicated would it be to directly access the HW RNG (if present) that early in the boot? It'd be nice if a U-Boot update weren't required (and particularly concerning that KASLR would appear to work without a U-Boot update, but without decent entropy). OK, I see that kaslr-seed is used on some other platforms, though arm64 aborts KASLR if it doesn't get a seed. I'm not sure if that's better than a loud warning message (or if it was a conscious choice rather than just not having an alternative implemented), but silently using poor entropy for something like this seems bad. It can still make the attacker's cost higher with not so good entropy. The same strategy exists in X86 when X86 KASLR uses RDTSC if without X86_FEATURE_RDRAND supported. I agree that having a warning message looks better for reminding people in this situation. -Scott .
RE: [PATCH] arm: xen: mm: use __GPF_DMA32 for arm64
Hi Stefano, > Subject: RE: [PATCH] arm: xen: mm: use __GPF_DMA32 for arm64 > > On Wed, 28 Aug 2019, Peng Fan wrote: > > Hi Robin, > > > > > Subject: Re: [PATCH] arm: xen: mm: use __GPF_DMA32 for arm64 > > > > > > On 09/07/2019 09:22, Peng Fan wrote: > > > > arm64 shares some code under arch/arm/xen, including mm.c. > > > > However ZONE_DMA is removed by commit > > > > ad67f5a6545("arm64: replace ZONE_DMA with ZONE_DMA32"). > > > > So to ARM64, need use __GFP_DMA32. > > Hi Peng, > > Sorry for being so late in replying, this email got lost in the noise. > > > > > > Signed-off-by: Peng Fan > > > > --- > > > > arch/arm/xen/mm.c | 2 +- > > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > > > diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index > > > > e1d44b903dfc..a95e76d18bf9 100644 > > > > --- a/arch/arm/xen/mm.c > > > > +++ b/arch/arm/xen/mm.c > > > > @@ -27,7 +27,7 @@ unsigned long > > > > xen_get_swiotlb_free_pages(unsigned > > > > int order) > > > > > > > > for_each_memblock(memory, reg) { > > > > if (reg->base < (phys_addr_t)0x) { > > > > - flags |= __GFP_DMA; > > > > + flags |= __GFP_DMA | __GFP_DMA32; > > > > > > Given the definition of GFP_ZONE_BAD, I'm not sure this combination > > > of flags is strictly valid, but rather is implicitly reliant on only > > > one of those zones ever actually existing. As such, it seems liable > > > to blow up if the plans to add ZONE_DMA to arm64[1] go ahead. > > > > How about this, or do you have any suggestions? > > diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index > > d33b77e9add3..f61c29a4430f 100644 > > --- a/arch/arm/xen/mm.c > > +++ b/arch/arm/xen/mm.c > > @@ -28,7 +28,11 @@ unsigned long xen_get_swiotlb_free_pages(unsigned > > int order) > > > > for_each_memblock(memory, reg) { > > if (reg->base < (phys_addr_t)0x) { > > +#ifdef CONFIG_ARM64 > > + flags |= __GFP_DMA32; #else > > flags |= __GFP_DMA; > > +#endif > > break; > > } > > } > > Yes I think this is the way to go, but we are trying not to add any #ifdef > CONFIG_ARM64 under arch/arm. Maybe you could introduce a static inline > function to set GFP_DMA: > > static inline void xen_set_gfp_dma(gfp_t *flags) > > it could be implemented under arch/arm/include/asm/xen/page.h for arm > and under arch/arm64/include/asm/xen/page.h for arm64 using __GFP_DMA > for the former and __GFP_DMA32 for the latter. Thanks for your suggestion. I'll use this in V2. Thanks, Peng.
[PATCH] amd-xgbe: Fix error path in xgbe_mod_init()
In xgbe_mod_init(), we should do cleanup if some error occurs Reported-by: Hulk Robot Fixes: efbaa828330a ("amd-xgbe: Add support to handle device renaming") Fixes: 47f164deab22 ("amd-xgbe: Add PCI device support") Signed-off-by: YueHaibing --- drivers/net/ethernet/amd/xgbe/xgbe-main.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index b41f236..7ce9c69 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -469,13 +469,19 @@ static int __init xgbe_mod_init(void) ret = xgbe_platform_init(); if (ret) - return ret; + goto err_platform_init; ret = xgbe_pci_init(); if (ret) - return ret; + goto err_pci_init; return 0; + +err_pci_init: + xgbe_platform_exit(); +err_platform_init: + unregister_netdevice_notifier(&xgbe_netdev_notifier); + return ret; } static void __exit xgbe_mod_exit(void) -- 1.8.3.1
RE: [PATCH V3 1/5] thermal: qoriq: Add clock operations
Hi, Rui > > On Wed, 2019-08-28 at 08:51 +, Anson Huang wrote: > > > Hi, Rui > > > > > > > On Tue, 2019-08-27 at 12:41 +, Leonard Crestez wrote: > > > > > On 27.08.2019 04:51, Anson Huang wrote: > > > > > > > In an earlier series the CLK_IS_CRITICAL flags was removed > > > > > > > from the TMU clock so if the thermal driver doesn't > > > > > > > explicitly enable it the system will hang on probe. This is > > > > > > > what happens in linux-next right now! > > > > > > > > > > > > The thermal driver should be built with module, so default > > > > > > kernel should can boot up, do you modify the thermal driver as > > > > > > built- in? > > > > > > > > > > > > > Unless this patches is merged soon we'll end up with a 5.4- > > > > > > > rc1 > > > > > > > that doesn't boot on imx8mq. An easy fix would be to > > > > > > > drop/revert commit > > > > > > > 951c1aef9691 ("clk: imx8mq: Remove CLK_IS_CRITICAL flag for > > > > > > > IMX8MQ_CLK_TMU_ROOT") until the thermal patches are > accepted. > > > > > > > > > > > > If the thermal driver is built as module, I think no need to > > > > > > revert the commit, but if by default thermal driver is > > > > > > built-in or mod probed, then yes, it should NOT break kernel boot > up. > > > > > > > > > > The qoriq_thermal driver is built as a module in defconfig and > > > > > when modules are properly installed in rootfs they will be > > > > > automatically be probed on boot and cause a hang. > > > > > > > > > > I usually run nfsroot with modules: > > > > > > > > > > make modules_install INSTALL_MOD_PATH=/srv/nfs/imx8-root > > > > > > > > so we need this patch shipped in the beginning of the merge > > > > window, right? > > > > if there is hard dependency between patches, it's better to send > > > > them in one series, and get shipped via either tree. > > > > > > There is no hard dependency in this patch series. Previous for the > > > TMU clock disabled patch, since thermal driver is built as module so > > > I did NOT found the issue. The patch series is the correct fix. > > > > > Got it. > > the clock patch is also queued for 5.4-rc1, right? > > I will apply this series and try to push it as early as possible > > during the merge window. > > The clock patch is as below in Linux-next tree, while I did NOT see it in > v5.3- > rc6, so it should be queued for 5.4-rc1, right? > Thanks for taking the patch series! Sorry for pushing, so you will apply this patch series to avoid the i.MX8MQ kernel boot up hang caused by insmod qoriq thermal driver, right? Then we no need to revert that TMU clock patch 951c1aef9691 ("clk: imx8mq: Remove CLK_IS_CRITICAL flag for IMX8MQ_CLK_TMU_ROOT"). Thanks, Anson > > > commit 951c1aef9691491ddf4dd5aab76f2665d56bd5d3 > Author: Anson Huang > Date: Fri Jul 5 12:56:11 2019 +0800 > > clk: imx8mq: Remove CLK_IS_CRITICAL flag for IMX8MQ_CLK_TMU_ROOT > > IMX8MQ_CLK_TMU_ROOT is ONLY used for thermal module, the driver > should manage this clock, so no need to have CLK_IS_CRITICAL flag > set. > > Signed-off-by: Anson Huang > Reviewed-by: Abel Vesa > Acked-by: Stephen Boyd > Signed-off-by: Shawn Guo > > drivers/clk/imx/clk-imx8mq.c > > > Thanks, > Anson
Re: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM SoC
On 8/29/2019 4:01 AM, Martin Blumenstingl wrote: Hi, On Wed, Aug 28, 2019 at 3:53 AM Chuan Hua, Lei wrote: [...] 1. reset-lantiq.c use index instead of register offset + bit position. index reset is good for a small system (< 64). However, it will become very difficult to use if you have > 100 reset. So we use register offset + bit position reset-lantiq uses bit bit positions for specifying the reset line. for example this is from OpenWrt's vr9.dtsi: reset0: reset-controller@10 { ... reg = <0x10 4>, <0x14 4>; #reset-cells = <2>; }; gphy0: gphy@20 { ... resets = <&reset0 31 30>, <&reset1 7 7>; reset-names = "gphy", "gphy2"; }; in my own words this means: - all reset0 reset bits are at offset 0x10 (parent is RCU) - all reset0 status bits are at offset 0x14 (parent is RCU) - the first reset line uses reset bit 31 and status bit 30 - the second reset line uses reset bit 7 and status bit 7 - there can be multiple reset-controller instances, each taking the reset and status offsets (OpenWrt's vr9.dtsi specifies the second RCU reset controller "reset1" with reset offset 0x48 and status offset 0x24) in reset-lantiq.c, we split each reset request /status pair into one reset controller. Each reset controller handles up to 32 resets. It will create up to 9 even more reset controllers in the new SoCs. In reality, there is only one RCU controller for all SoCs. These designs worked but did not follow what hardware implemented. After checking the existing code and referring to other implementation, we decided to use register offset + bit position method. It can support all SoCs with this methods without code change(device tree change only). maybe I have a different interpretation of what "RCU" does. let me explain it in my own words based on my knowledge about VRX200: - in my own words it is a multi function device with the following functionality: - it contains two reset controllers (reset at 0x10, status 0x14 and reset at 0x48, status at 0x24) - it contains two USB2 PHYs (PHY registers at 0x18, ANA cfg at 0x38 and PHY registers at 0x34, ANA cfg at 0x3c) - it contains the configuration for the two GPHY IP blocks (at 0x20 and 0x68) - it contains endianness configuration registers (for PCI, PCIe, ...) - it contains the watchdog boot status (whether the SoC was previously reset by the WDT) - maybe more, but I don't know anything else about it In fact, there is only one reset controller for all SoCs even it doesn't prevent software from virtualizing multiple reset controllers. Reset control does include some misc stuff which has been moved to chiptop in new SoCs so that RCU has a clean job. just to confirm that I understand this correctly: even the VRX200 SoC only has one physical reset controller? instead of a contiguous register area (let's say: 0x10 to 0x1c) it uses four separate registers: - 0x10 for asserting/deasserting/pulsing the first 32 reset lines - 0x14 for the status of the first 32 reset lines - 0x48 for asserting/deasserting/pulsing the second 32 reset lines - 0x28 for the status of the second 32 reset lines Yes, but for VRX200, reset controller registers include some other misc registers. At that time, hardware doesn't use chiptop concept, they put some misc registers into CGU/RCU which makes it quite messy. We also prefer to have 0x10~0x1c. However, when developing VRX200, 0x18, 0x20 and other address had been used by other registers. system becomes more complex, need more reset bits for new modules, then hardware just added them to any available place. From another angle, hardware people also tried to keep backward compatible with old products like Danube. I'm not surprised that we got some of the IP block layout for the VRX200 RCU "wrong" - all "documentation" we have is the old Lantiq UGW (BSP). with proper documentation (as in a "public datasheet for the SoC") it would be easy to spot these mistakes (at least I assume that the quality of the Infineon / Lantiq datasheets is excellent). back to reset-intel-syscon: assigning only one job to the RCU hardware is a good idea (in my opinion). that brings up a question: why do we need the "syscon" compatible for the RCU node? this is typically used when registers are accessed by another IP block and the other driver has to access these registers as well. does this mean that there's more hidden in the RCU registers? As I mentioned, some other misc registers are put into RCU even they don't belong to reset functions. In MIPS, global software reset handled in arch/mips/, only recently, this situation changed. This means we have at least two places to access this module. 2. reset-lantiq.c does not support device restart which is part of the reset in old lantiq SoC. It moved this part into arch/mips/lantiq directory. it was moved to the .dts instead of the arch code. again from OpenWrt's vr9.dtsi [0]: reboot { compatible = "syscon-reboot"; regmap
Re: [PATCH v2 3/3] dwc: PCI: intel: Intel PCIe RC controller driver
On 8/29/2019 3:36 AM, Martin Blumenstingl wrote: On Wed, Aug 28, 2019 at 5:35 AM Chuan Hua, Lei wrote: [...] +static int intel_pcie_ep_rst_init(struct intel_pcie_port *lpp) +{ +struct device *dev = lpp->pci->dev; +int ret = 0; + +lpp->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); +if (IS_ERR(lpp->reset_gpio)) { +ret = PTR_ERR(lpp->reset_gpio); +if (ret != -EPROBE_DEFER) +dev_err(dev, "failed to request PCIe GPIO: %d\n", ret); +return ret; +} +/* Make initial reset last for 100ms */ +msleep(100); why is there lpp->rst_interval when you hardcode 100ms here? There are different purpose. rst_interval is purely for asserted reset pulse. Here 100ms is to make sure the initial state keeps at least 100ms, then we can reset. my interpretation is that it totally depends on the board design or the bootloader setup. Partially, you are right. However, we should not add some dependency here from bootloader and board. rst_interval is just to make sure the pulse (low active or high active) lasts the specified the time. +Cc Kishon he recently added support for a GPIO reset line to the pcie-cadence-host.c [0] and I believe he's also maintaining pci-keystone.c which are both using a 100uS delay (instead of 100ms). I don't know the PCIe spec so maybe Kishon can comment on the values that should be used according to the spec. if there's then a reason why values other than the ones from the spec are needed then there should be a comment explaining why different values are needed (what problem does it solve). spec doesn't guide this part. It is a board or SoC specific setting. 100us also should work. spec only requirs reset duration should last 100ms. The idea is that before reset assert and deassert, make sure the default deassert status keeps some time. We take this value from hardware suggestion long time back. We can reduce this value to 100us, but we need to test on the board. OK. I don't know how other PCI controller drivers manage this. if the PCI maintainers are happy with this then I am as well maybe it's worth changing the comment to indicate that this delay was suggested by the hardware team (so it's clear that this is not coming from the PCI spec) Dilip will change to 100us delay and run the test. I also need to run some tests for old boards(XRX350/550/PRX300) to confirm this has no impact on function. [...] +static void __intel_pcie_remove(struct intel_pcie_port *lpp) +{ +pcie_rc_cfg_wr_mask(lpp, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, +0, PCI_COMMAND); I expect logic like this to be part of the PCI subsystem in Linux. why is this needed? [...] bind/unbind case we use this. For extreme cases, we use unbind and bind to reset PCI instead of rebooting. OK, but this does not seem Intel/Lantiq specific at all why isn't this managed by either pcie-designware-host.c or the generic PCI/PCIe subsystem in Linux? I doubt if other RC driver will support bind/unbind. We do have this requirement due to power management from WiFi devices. pcie-designware-host.c will gain .remove() support in Linux 5.4 I don't understand how .remove() and then .probe() again is different from .unbind() followed by a .bind() Good. If this is the case, bind/unbind eventually goes to probe/remove, so we can remove this. OK. as far as I understand you need to call dw_pcie_host_deinit from the .remove() callback (which is missing in this version) (I'm using drivers/pci/controller/dwc/pcie-tegra194.c as an example, this driver is in linux-next and thus will appear in Linux 5.4) Thanks for your information. We should adapt this in next version. Martin
Re: [PATCH V3 1/5] thermal: qoriq: Add clock operations
On Thu, 2019-08-29 at 02:49 +, Anson Huang wrote: > Hi, Rui > > > > On Wed, 2019-08-28 at 08:51 +, Anson Huang wrote: > > > > Hi, Rui > > > > > > > > > On Tue, 2019-08-27 at 12:41 +, Leonard Crestez wrote: > > > > > > On 27.08.2019 04:51, Anson Huang wrote: > > > > > > > > In an earlier series the CLK_IS_CRITICAL flags was > > > > > > > > removed > > > > > > > > from the TMU clock so if the thermal driver doesn't > > > > > > > > explicitly enable it the system will hang on probe. > > > > > > > > This is > > > > > > > > what happens in linux-next right now! > > > > > > > > > > > > > > The thermal driver should be built with module, so > > > > > > > default > > > > > > > kernel should can boot up, do you modify the thermal > > > > > > > driver as > > > > > > > built- in? > > > > > > > > > > > > > > > Unless this patches is merged soon we'll end up with a > > > > > > > > 5.4- > > > > > > > > rc1 > > > > > > > > that doesn't boot on imx8mq. An easy fix would be to > > > > > > > > drop/revert commit > > > > > > > > 951c1aef9691 ("clk: imx8mq: Remove CLK_IS_CRITICAL flag > > > > > > > > for > > > > > > > > IMX8MQ_CLK_TMU_ROOT") until the thermal patches are > > > > accepted. > > > > > > > > > > > > > > If the thermal driver is built as module, I think no need > > > > > > > to > > > > > > > revert the commit, but if by default thermal driver is > > > > > > > built-in or mod probed, then yes, it should NOT break > > > > > > > kernel boot > > > > up. > > > > > > > > > > > > The qoriq_thermal driver is built as a module in defconfig > > > > > > and > > > > > > when modules are properly installed in rootfs they will be > > > > > > automatically be probed on boot and cause a hang. > > > > > > > > > > > > I usually run nfsroot with modules: > > > > > > > > > > > > make modules_install INSTALL_MOD_PATH=/srv/nfs/imx8- > > > > > > root > > > > > > > > > > so we need this patch shipped in the beginning of the merge > > > > > window, right? > > > > > if there is hard dependency between patches, it's better to > > > > > send > > > > > them in one series, and get shipped via either tree. > > > > > > > > There is no hard dependency in this patch series. Previous for > > > > the > > > > TMU clock disabled patch, since thermal driver is built as > > > > module so > > > > I did NOT found the issue. The patch series is the correct fix. > > > > > > > > > > Got it. > > > the clock patch is also queued for 5.4-rc1, right? > > > I will apply this series and try to push it as early as possible > > > during the merge window. > > > > The clock patch is as below in Linux-next tree, while I did NOT see > > it in v5.3- > > rc6, so it should be queued for 5.4-rc1, right? > > Thanks for taking the patch series! > > Sorry for pushing, so you will apply this patch series to avoid the > i.MX8MQ kernel boot up hang > caused by insmod qoriq thermal driver, right? Then we no need to > revert that TMU clock patch > 951c1aef9691 ("clk: imx8mq: Remove CLK_IS_CRITICAL flag for > IMX8MQ_CLK_TMU_ROOT"). > right. I will queue it for 5.4-rc1. thanks, rui > Thanks, > Anson > > > > > > > commit 951c1aef9691491ddf4dd5aab76f2665d56bd5d3 > > Author: Anson Huang > > Date: Fri Jul 5 12:56:11 2019 +0800 > > > > clk: imx8mq: Remove CLK_IS_CRITICAL flag for > > IMX8MQ_CLK_TMU_ROOT > > > > IMX8MQ_CLK_TMU_ROOT is ONLY used for thermal module, the driver > > should manage this clock, so no need to have CLK_IS_CRITICAL > > flag > > set. > > > > Signed-off-by: Anson Huang > > Reviewed-by: Abel Vesa > > Acked-by: Stephen Boyd > > Signed-off-by: Shawn Guo > > > > drivers/clk/imx/clk-imx8mq.c > > > > > > Thanks, > > Anson
Re: [RESEND PATCH V3 3/8] perf/x86/intel: Support hardware TopDown metrics
On Wed, Aug 28, 2019 at 06:28:57PM +0200, Peter Zijlstra wrote: > On Wed, Aug 28, 2019 at 09:17:54AM -0700, Andi Kleen wrote: > > > This really doesn't make sense to me; if you set FIXED_CTR3 := 0, you'll > > > never trigger the overflow there; this then seems to suggest the actual > > > > The 48bit counter might overflow in a few hours. > > Sure; the point is? Kan said it should not be too big; a full 48bit wrap > around must be too big or nothing is. We expect users to avoid making it too big, but we cannot rule it out. Actually for the common perf stat for a long time case you can make it fairly big because the percentages still work out over the complete run time. The problem with letting it accumulate too much is mainly if you want to use a continuously running metrics counter to time smaller regions by taking deltas, for example using RDPMC. In this case the small regions would be too inaccurate after some time. But to make the first case work absolutely need to handle the overflow case. Otherwise the metrics would just randomly stop at some point. -Andi
[PATCH 1/1] sched/rt: avoid contend with CFS task
At original linux design, RT & CFS scheduler are independent. Current RT task placement policy will select the first cpu in lowest_mask, even if the first CPU is running a CFS task. This may put RT task to a running cpu and let CFS task runnable. So we select idle cpu in lowest_mask first to avoid preempting CFS task. Signed-off-by: Jing-Ting Wu --- kernel/sched/rt.c | 42 +- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a532558..626ca27 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1388,7 +1388,6 @@ static void yield_task_rt(struct rq *rq) static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { - struct task_struct *curr; struct rq *rq; /* For anything but wake ups, just return the task_cpu */ @@ -1398,33 +1397,15 @@ static void yield_task_rt(struct rq *rq) rq = cpu_rq(cpu); rcu_read_lock(); - curr = READ_ONCE(rq->curr); /* unlocked access */ /* -* If the current task on @p's runqueue is an RT task, then -* try to see if we can wake this RT task up on another -* runqueue. Otherwise simply start this RT task -* on its current runqueue. -* -* We want to avoid overloading runqueues. If the woken -* task is a higher priority, then it will stay on this CPU -* and the lower prio task should be moved to another CPU. -* Even though this will probably make the lower prio task -* lose its cache, we do not want to bounce a higher task -* around just because it gave up its CPU, perhaps for a -* lock? -* -* For equal prio tasks, we just let the scheduler sort it out. -* -* Otherwise, just let it ride on the affined RQ and the -* post-schedule router will push the preempted task away -* -* This test is optimistic, if we get it wrong the load-balancer -* will have to sort it out. +* If the task p is allowed to put more than one CPU or +* it is not allowed to put on this CPU. +* Let p use find_lowest_rq to choose other idle CPU first, +* instead of choose this cpu and preempt curr cfs task. */ - if (curr && unlikely(rt_task(curr)) && - (curr->nr_cpus_allowed < 2 || -curr->prio <= p->prio)) { + if ((p->nr_cpus_allowed > 1) || + (!cpumask_test_cpu(cpu, p->cpus_ptr))) { int target = find_lowest_rq(p); /* @@ -1648,6 +1629,7 @@ static int find_lowest_rq(struct task_struct *task) struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + int i; /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) @@ -1659,6 +1641,16 @@ static int find_lowest_rq(struct task_struct *task) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ + /* Choose previous cpu if it is idle and it fits lowest_mask */ + if (cpumask_test_cpu(cpu, lowest_mask) && idle_cpu(cpu)) + return cpu; + + /* Choose idle_cpu among lowest_mask */ + for_each_cpu(i, lowest_mask) { + if (idle_cpu(i)) + return i; + } + /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect -- 1.7.9.5
[PATCH netdev] net: stmmac: dwmac-rk: Don't fail if phy regulator is absent
From: Chen-Yu Tsai The devicetree binding lists the phy phy as optional. As such, the driver should not bail out if it can't find a regulator. Instead it should just skip the remaining regulator related code and continue on normally. Skip the remainder of phy_power_on() if a regulator supply isn't available. This also gets rid of the bogus return code. Fixes: 2e12f536635f ("net: stmmac: dwmac-rk: Use standard devicetree property for phy regulator") Signed-off-by: Chen-Yu Tsai --- On a separate note, maybe we should add this file to the Rockchip entry in MAINTAINERS? --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 4644b2aeeba1..e2e469c37a4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1194,10 +1194,8 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable) int ret; struct device *dev = &bsp_priv->pdev->dev; - if (!ldo) { - dev_err(dev, "no regulator found\n"); - return -1; - } + if (!ldo) + return 0; if (enable) { ret = regulator_enable(ldo); -- 2.20.1
Re: [PATCH v2] fs/proc/page: Skip uninitialized page when iterating page structures
On 2019/08/28 23:18, Waiman Long wrote: > On 8/28/19 10:09 AM, Michal Hocko wrote: >> On Wed 28-08-19 09:46:21, Waiman Long wrote: >>> On 8/28/19 4:00 AM, Michal Hocko wrote: On Tue 27-08-19 16:22:38, Michal Hocko wrote: > Dan, isn't this something we have discussed recently? This was http://lkml.kernel.org/r/20190725023100.31141-3-t-fukas...@vx.jp.nec.com and talked about /proc/kpageflags but this is essentially the same thing AFAIU. I hope we get a consistent solution for both issues. >>> Yes, it is the same problem. The uninitialized page structure problem >>> affects all the 3 /proc/kpage{cgroup,count,flags) files. >>> >>> Toshiki's patch seems to fix it just for /proc/kpageflags, though. >> Yup. I was arguing that whacking a mole kinda fix is far from good. Dan >> had some arguments on why initializing those struct pages is a problem. >> The discussion had a half open end though. I hoped that Dan would try >> out the initialization side but I migh have misunderstood. > > If the page structures of the reserved PFNs are always initialized, that > will fix the problem too. I am not familiar with the zone device code. > So I didn't attempt to do that. > > Cheers, > Longman > > I also think that the struct pages should be initialized. I'm preparing to post the patch. Toshiki Fukasawa
Re: [PATCH] usb: storage: Add ums-cros-aoa driver
(Thanks for the reviews... I'll get back to the kernel code details after double-checking if this can be done from userspace.) > > Besides, what's wrong with binding to devices that weren't switched > > into AOA mode? Would that just provoke a bunch of unnecessary error > > messages? It's not about devices that aren't switched into AOA mode... it's about devices that are switched into AOA mode for other reasons (connecting to other Android apps). I don't think a kernel driver like that exists today, but it could be added, or people could use libusb to talk to an AOA device. AOA is just a general mechanism to talk to an Android app for whatever you want, and the descriptors sent during mode switch clarify what app it's talking to (and thereby what protocol it is using... it could be mass storage or it could be something entirely different). But a device switched into AOA mode for whatever app will always use that same well-known VID/PID (18d1:2d00). So if I just add that VID/PID to the IDs bound by the usb-storage driver, it would also grab a device that was mode-switched by userspace to talk to a completely different app. I need some way to make sure it only grabs the intended device, and there's no good identifier for that other than comparing the dev path to what you originally mode switched. > > > + /* > > > + * Only interface 0 connects to the AOA app. Android devices that > > > have > > > + * ADB enabled also export an interface 1. We don't want it. > > > + */ > > > + if (us->pusb_intf->cur_altsetting->desc.bInterfaceNumber != 0) > > > + return -ENODEV; > > > > Do you really need this test? What would go wrong if you don't do it? Yes, otherwise two separate usb-storage instances bind to the two interfaces. The second interface is meant for a special ADB debugging protocol and will not respond at all to USB mass storage packets, so eventually the first request to it times out and usb_stor_invoke_transport() will do a port reset to recover. That also kills the first interface asynchronously even though it was working fine. > > IMO the userspace approach would be better, unless you can provide a > > really compelling argument for why it won't suffice. Well... okay, let me think through that again. I just found the new_id sysfs API that I wasn't aware of before, maybe I could leverage that to bind this from userspace after doing the mode switch. But it looks like that only operates on whole devices... is there any way to force it to only bind one particular interface?
Re: [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-)
On 8/28/19 7:02 PM, Ira Weiny wrote: On Mon, Aug 26, 2019 at 03:55:10PM +1000, Dave Chinner wrote: On Fri, Aug 23, 2019 at 10:08:36PM -0700, Ira Weiny wrote: On Sat, Aug 24, 2019 at 10:11:24AM +1000, Dave Chinner wrote: On Fri, Aug 23, 2019 at 09:04:29AM -0300, Jason Gunthorpe wrote: ... Sure, that part works because the struct file is passed. It doesn't end up with the same fd number in the other process, though. The issue is that layout leases need to notify userspace when they are broken by the kernel, so a lease stores the owner pid/tid in the file->f_owner field via __f_setown(). It also keeps a struct fasync attached to the file_lock that records the fd that the lease was created on. When a signal needs to be sent to userspace for that lease, we call kill_fasync() and that walks the list of fasync structures on the lease and calls: send_sigio(fown, fa->fa_fd, band); And it does for every fasync struct attached to a lease. Yes, a lease can track multiple fds, but it can only track them in a single process context. The moment the struct file is shared with another process, the lease is no longer capable of sending notifications to all the lease holders. Yes, you can change the owning process via F_SETOWNER, but that's still only a single process context, and you can't change the fd in the fasync list. You can add new fd to an existing lease by calling F_SETLEASE on the new fd, but you still only have a single process owner context for signal delivery. As such, leases that require callbacks to userspace are currently only valid within the process context the lease was taken in. But for long term pins we are not requiring callbacks. Hi Ira, If "require callbacks to userspace" means sending SIGIO, then actually FOLL_LONGTERM *does* require those callbacks. Because we've been, so far, equating FOLL_LONGTERM with the vaddr_pin struct and with a lease. What am I missing here? thanks, -- John Hubbard NVIDIA
Re: [PATCH 00/10] OOM Debug print selection and additional information
On Wed, Aug 28, 2019 at 1:04 PM Edward Chron wrote: > > On Wed, Aug 28, 2019 at 3:12 AM Tetsuo Handa > wrote: > > > > On 2019/08/28 16:08, Michal Hocko wrote: > > > On Tue 27-08-19 19:47:22, Edward Chron wrote: > > >> For production systems installing and updating EBPF scripts may someday > > >> be very common, but I wonder how data center managers feel about it now? > > >> Developers are very excited about it and it is a very powerful tool but > > >> can I > > >> get permission to add or replace an existing EBPF on production systems? > > > > > > I am not sure I understand. There must be somebody trusted to take care > > > of systems, right? > > > > > > > Speak of my cases, those who take care of their systems are not developers. > > And they afraid changing code that runs in kernel mode. They unlikely give > > permission to install SystemTap/eBPF scripts. As a result, in many cases, > > the root cause cannot be identified. > > +1. Exactly. The only thing we could think of Tetsuo is if Linux OOM Reporting > uses a an eBPF script then systems have to load them to get any kind of > meaningful report. Frankly, if using eBPF is the route to go than essentially > the whole OOM reporting should go there. We can adjust as we need and > have precedent for wanting to load the script. That's the best we could come > up with. > > > > > Moreover, we are talking about OOM situations, where we can't expect > > userspace > > processes to work properly. We need to dump information we want, without > > counting on userspace processes, before sending SIGKILL. > > +1. We've tried and as you point out and for best results the kernel > has to provide > the state. > > Again a full system dump would be wonderful, but taking a full dump for > every OOM event on production systems? I am not nearly a good enough salesman > to sell that one. So we need an alternate mechanism. > > If we can't agree on some sort of extensible, configurable approach then put > the standard OOM Report in eBPF and make it mandatory to load it so we can > justify having to do that. Linux should load it automatically. > We'll just make a few changes and additions as needed. > > Sounds like a plan that we could live with. > Would be interested if this works for others as well. One further comment. In talking with my colleagues here who know eBPF much better than I do, it may not be possible to implement something this complicated with eBPF. If that is in the fact the case, then we'd have to try and hook the OOM Reporting code with tracepoints similar to kprobes only we want to do more than add counters we want to change the flow to skip small output entries that aren't worth printing. If this isn't feasible with eBPF, then some derivative or our approach or enhancing the OOM output code directly seem like the best options. Will have to investigate this further.
Re: [PATCH] usb: chipidea: msm: Use device-managed registration API
On 19-08-28 19:42:32, Chuhong Yuan wrote: > On Wed, Aug 28, 2019 at 11:24 AM Peter Chen wrote: > > > > On 19-07-23 11:02:07, Chuhong Yuan wrote: > > > Use devm_reset_controller_register to get rid > > > of manual unregistration. > > > > > > Signed-off-by: Chuhong Yuan > > > --- > > > drivers/usb/chipidea/ci_hdrc_msm.c | 4 +--- > > > 1 file changed, 1 insertion(+), 3 deletions(-) > > > > > > diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c > > > b/drivers/usb/chipidea/ci_hdrc_msm.c > > > index bb4645a8ca46..067542e84aea 100644 > > > --- a/drivers/usb/chipidea/ci_hdrc_msm.c > > > +++ b/drivers/usb/chipidea/ci_hdrc_msm.c > > > @@ -216,7 +216,7 @@ static int ci_hdrc_msm_probe(struct platform_device > > > *pdev) > > > ci->rcdev.ops = &ci_hdrc_msm_reset_ops; > > > ci->rcdev.of_node = pdev->dev.of_node; > > > ci->rcdev.nr_resets = 2; > > > - ret = reset_controller_register(&ci->rcdev); > > > + ret = devm_reset_controller_register(&pdev->dev, &ci->rcdev); > > > if (ret) > > > return ret; > > > > > > @@ -272,7 +272,6 @@ static int ci_hdrc_msm_probe(struct platform_device > > > *pdev) > > > err_iface: > > > clk_disable_unprepare(ci->core_clk); > > > err_fs: > > > - reset_controller_unregister(&ci->rcdev); > > > > It is devm API, why the unregister needs to be called at > > fail path? > > > > I am not very clear about your problem... > After using devm_reset_controller_register(), I have removed > reset_controller_unregister() calls > in this patch. > Sorry, my fault. Your patch is ok, but try to clean up the label "err_fs" since it is not needed. Peter > > Peter > > > > > return ret; > > > } > > > > > > @@ -284,7 +283,6 @@ static int ci_hdrc_msm_remove(struct platform_device > > > *pdev) > > > ci_hdrc_remove_device(ci->ci); > > > clk_disable_unprepare(ci->iface_clk); > > > clk_disable_unprepare(ci->core_clk); > > > - reset_controller_unregister(&ci->rcdev); > > > > > > return 0; > > > } > > > -- > > > 2.20.1 > > >
Re: [PATCH] ima: use struct_size() in kzalloc()
Hi Gustavo, On Wed, 2019-08-28 at 13:29 -0500, Gustavo A. R. Silva wrote: > On 5/29/19 11:53 AM, Gustavo A. R. Silva wrote: > > One of the more common cases of allocation size calculations is finding > > the size of a structure that has a zero-sized array at the end, along > > with memory for some number of elements for that array. For example: > > > > struct foo { > >int stuff; > >struct boo entry[]; > > }; > > > > instance = kzalloc(sizeof(struct foo) + count * sizeof(struct boo), > > GFP_KERNEL); > > > > Instead of leaving these open-coded and prone to type mistakes, we can > > now use the new struct_size() helper: > > > > instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); > > > > This code was detected with the help of Coccinelle. > > > > Signed-off-by: Gustavo A. R. Silva > > --- > > security/integrity/ima/ima_template.c | 5 ++--- > > 1 file changed, 2 insertions(+), 3 deletions(-) > > > > diff --git a/security/integrity/ima/ima_template.c > > b/security/integrity/ima/ima_template.c > > index b631b8bc7624..b945dff2ed14 100644 > > --- a/security/integrity/ima/ima_template.c > > +++ b/security/integrity/ima/ima_template.c > > @@ -281,9 +281,8 @@ static int ima_restore_template_data(struct > > ima_template_desc *template_desc, > > int ret = 0; > > int i; > > > > - *entry = kzalloc(sizeof(**entry) + > > - template_desc->num_fields * sizeof(struct ima_field_data), > > - GFP_NOFS); > > + *entry = kzalloc(struct_size(*entry, template_data, > > +template_desc->num_fields), GFP_NOFS); > > if (!*entry) > > return -ENOMEM; > > > > The same usage exists in ima_api.c: ima_alloc_init_template(). Did you want to make the change there as well? thanks, Mimi
linux-next: build warning after merge of the block tree
Hi all, After merging the block tree, today's linux-next build (x86_64 allmodconfig) produced this warning: In file included from include/trace/events/iocost.h:8, from : include/trace/events/iocost.h:12:57: warning: 'struct ioc_now' declared inside parameter list will not be visible outside of this definition or declaration TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, ^~~ include/linux/tracepoint.h:233:34: note: in definition of macro '__DECLARE_TRACE' static inline void trace_##name(proto)\ ^ include/linux/tracepoint.h:396:24: note: in expansion of macro 'PARAMS' __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ ^~ include/linux/tracepoint.h:532:2: note: in expansion of macro 'DECLARE_TRACE' DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^ include/linux/tracepoint.h:532:22: note: in expansion of macro 'PARAMS' DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~ include/trace/events/iocost.h:10:1: note: in expansion of macro 'TRACE_EVENT' TRACE_EVENT(iocost_iocg_activate, ^~~ include/trace/events/iocost.h:12:2: note: in expansion of macro 'TP_PROTO' TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, ^~~~ include/trace/events/iocost.h:12:18: warning: 'struct ioc_gq' declared inside parameter list will not be visible outside of this definition or declaration TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, ^~ include/linux/tracepoint.h:233:34: note: in definition of macro '__DECLARE_TRACE' static inline void trace_##name(proto)\ ^ include/linux/tracepoint.h:396:24: note: in expansion of macro 'PARAMS' __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ ^~ include/linux/tracepoint.h:532:2: note: in expansion of macro 'DECLARE_TRACE' DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^ include/linux/tracepoint.h:532:22: note: in expansion of macro 'PARAMS' DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) ^~ include/trace/events/iocost.h:10:1: note: in expansion of macro 'TRACE_EVENT' TRACE_EVENT(iocost_iocg_activate, ^~~ include/trace/events/iocost.h:12:2: note: in expansion of macro 'TP_PROTO' TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now, ^~~~ (and many more) Introduced by commit 7caa47151ab2 ("blkcg: implement blk-iocost") To get these warnings you need to build with CONFIG_HEADER_TEST and CONFIG_KERNEL_HEADER_TEST (and maybe CONFIG_UAPI_HEADER_TEST). allmodconfig does that. -- Cheers, Stephen Rothwell pgp76eVUKGASg.pgp Description: OpenPGP digital signature
[PATCH] mm: Remove NULL check in clear_hwpoisoned_pages()
There is no possibility for memmap to be NULL in the current codebase. This check was added in commit 95a4774d055c ("memory-hotplug: update mce_bad_pages when removing the memory") where memmap was originally inited to NULL, and only conditionally given a value. The code that could have passed a NULL has been removed, so there is no longer a possibility that memmap can be NULL. Signed-off-by: Alastair D'Silva --- mm/sparse.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index 78979c142b7d..9f7e3682cdcb 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -754,9 +754,6 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { int i; - if (!memmap) - return; - /* * A further optimization is to have per section refcounted * num_poisoned_pages. But that would need more space per memmap, so -- 2.21.0
Re: [PATCH v2 15/15] drivers: thermal: tsens: Add interrupt support
Hi Amit, On 08/27/2019 08:14 AM, Amit Kucheria wrote: > Depending on the IP version, TSENS supports upper, lower, max, min and > critical threshold interrupts. We only add support for upper and lower > threshold interrupts for now. > > TSENSv2 has an irq [status|clear|mask] bit tuple for each sensor while > earlier versions only have a single bit per sensor to denote status and > clear. At each interrupt, we reprogram the new upper and lower threshold > in the .set_trip callback. > > Signed-off-by: Amit Kucheria > --- > drivers/thermal/qcom/tsens-common.c | 377 ++-- > drivers/thermal/qcom/tsens-v0_1.c | 11 + > drivers/thermal/qcom/tsens-v1.c | 29 +++ > drivers/thermal/qcom/tsens-v2.c | 13 + > drivers/thermal/qcom/tsens.c| 32 ++- > drivers/thermal/qcom/tsens.h| 270 > 6 files changed, 669 insertions(+), 63 deletions(-) > > diff --git a/drivers/thermal/qcom/tsens-common.c > b/drivers/thermal/qcom/tsens-common.c > index 06b44cfd5eab9..c549f8e1488ba 100644 > --- a/drivers/thermal/qcom/tsens-common.c > +++ b/drivers/thermal/qcom/tsens-common.c > @@ -13,6 +13,30 @@ > #include > #include "tsens.h" > > +/** struct tsens_irq_data - IRQ status and temperature violations > + * @up_viol:upper threshold violated > + * @up_thresh: upper threshold temperature value > + * @up_irq_mask:mask register for upper threshold irqs > + * @up_irq_clear: clear register for uppper threshold irqs > + * @low_viol: lower threshold violated > + * @low_thresh: lower threshold temperature value > + * @low_irq_mask: mask register for lower threshold irqs > + * @low_irq_clear: clear register for lower threshold irqs > + * > + * Structure containing data about temperature threshold settings and > + * irq status if they were violated. > + */ > +struct tsens_irq_data { > + u32 up_viol; > + int up_thresh; > + u32 up_irq_mask; > + u32 up_irq_clear; > + u32 low_viol; > + int low_thresh; > + u32 low_irq_mask; > + u32 low_irq_clear; > +}; > + > char *qfprom_read(struct device *dev, const char *cname) > { > struct nvmem_cell *cell; > @@ -65,6 +89,14 @@ void compute_intercept_slope(struct tsens_priv *priv, u32 > *p1, > } > } > > +static inline u32 degc_to_code(int degc, const struct tsens_sensor *sensor) > +{ > + u64 code = (degc * sensor->slope + sensor->offset) / SLOPE_FACTOR; > + > + pr_debug("%s: raw_code: 0x%llx, degc:%d\n", __func__, code, degc); > + return clamp_val(code, THRESHOLD_MIN_ADC_CODE, THRESHOLD_MAX_ADC_CODE); > +} > + > static inline int code_to_degc(u32 adc_code, const struct tsens_sensor *s) > { > int degc, num, den; > @@ -114,6 +146,314 @@ static int tsens_hw_to_mC(struct tsens_sensor *s, int > field) > return sign_extend32(temp, priv->tempres) * 100; > } > > +/** > + * tsens_mC_to_hw - Return correct value to be written to threshold > + * registers, whether in ADC code or deciCelsius depending on IP version > + */ > +static int tsens_mC_to_hw(struct tsens_sensor *s, int temp) > +{ > + struct tsens_priv *priv = s->priv; > + > + if (priv->feat->adc) { > + /* milliC to C to adc code */ > + return degc_to_code(temp / 1000, s); > + } > + > + /* milliC to deciC */ > + return temp / 100; > +} > + > +static inline unsigned int tsens_ver(struct tsens_priv *priv) > +{ > + return priv->feat->ver_major; > +} > + > +/** > + * tsens_set_interrupt_v1 - Disable an interrupt (enable = false) > + * Re-enable an interrupt (enable = true) > + */ > +static void tsens_set_interrupt_v1(struct tsens_priv *priv, u32 hw_id, > +enum tsens_irq_type irq_type, bool enable) > +{ > + u32 index; > + > + if (enable) { > + switch (irq_type) { > + case UPPER: > + index = UP_INT_CLEAR_0 + hw_id; > + break; > + case LOWER: > + index = LOW_INT_CLEAR_0 + hw_id; > + break; > + } > + regmap_field_write(priv->rf[index], 0); > + } else { > + switch (irq_type) { > + case UPPER: > + index = UP_INT_CLEAR_0 + hw_id; > + break; > + case LOWER: > + index = LOW_INT_CLEAR_0 + hw_id; > + break; > + } > + regmap_field_write(priv->rf[index], 1); > + } > +} > + > +/** > + * tsens_set_interrupt_v2 - Disable an interrupt (enable = false) > + * Re-enable an interrupt (enable = true) > + */ > +static void tsens_set_interrupt_v2(struct tsens_priv *priv, u32 hw_id, > +enum tsens_irq_type irq_type, bool enable) > +{ > + u32 index_mask, index_clear; > + > + if (enable) { > + switch (irq_type) { > + case UPPER: >
linux-next: build failure after merge of the regulator tree
Hi all, After merging the regulator tree, today's linux-next build (x86_64 allmodconfig) failed like this: drivers/regulator/mt6358-regulator.c:5:10: fatal error: linux/mfd/mt6358/registers.h: No such file or directory #include ^~ Caused by commit f67ff1bd58f0 ("regulator: mt6358: Add support for MT6358 regulator") I have reverted that commit for today. -- Cheers, Stephen Rothwell pgp9FNuSWoypW.pgp Description: OpenPGP digital signature
BUG: corrupted list in p9_fd_cancelled (2)
Hello, syzbot found the following crash on: HEAD commit:36146921 Merge tag 'hyperv-fixes-signed' of git://git.kern.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=169f691e60 kernel config: https://syzkaller.appspot.com/x/.config?x=6919752cc1b760b4 dashboard link: https://syzkaller.appspot.com/bug?extid=1d26c4ed77bc6c5ed5e6 compiler: gcc (GCC) 9.0.0 20181231 (experimental) syz repro: https://syzkaller.appspot.com/x/repro.syz?x=14d03ba660 IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+1d26c4ed77bc6c5ed...@syzkaller.appspotmail.com list_del corruption, 88808ecdbfb0->next is LIST_POISON1 (dead0100) [ cut here ] kernel BUG at lib/list_debug.c:45! invalid opcode: [#1] PREEMPT SMP KASAN CPU: 0 PID: 20174 Comm: syz-executor.1 Not tainted 5.3.0-rc5+ #125 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__list_del_entry_valid.cold+0x23/0x4f lib/list_debug.c:45 Code: e8 d5 06 1e fe 0f 0b 4c 89 f6 48 c7 c7 e0 26 c6 87 e8 c4 06 1e fe 0f 0b 4c 89 ea 4c 89 f6 48 c7 c7 20 26 c6 87 e8 b0 06 1e fe <0f> 0b 4c 89 e2 4c 89 f6 48 c7 c7 80 26 c6 87 e8 9c 06 1e fe 0f 0b RSP: 0018:8880994076d8 EFLAGS: 00010286 RAX: 004e RBX: 111013280ee9 RCX: RDX: RSI: 815c2526 RDI: ed1013280ecd RBP: 8880994076f0 R08: 004e R09: ed1015d060d1 R10: ed1015d060d0 R11: 8880ae830687 R12: dead0122 R13: dead0100 R14: 88808ecdbfb0 R15: 88808ecdbfb8 FS: 7fb2aca54700() GS:8880ae80() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7ffee6574f58 CR3: a8e6d000 CR4: 001406f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: __list_del_entry include/linux/list.h:131 [inline] list_del include/linux/list.h:139 [inline] p9_fd_cancelled+0x3c/0x1c0 net/9p/trans_fd.c:710 p9_client_flush+0x1b7/0x1f0 net/9p/client.c:674 p9_client_rpc+0x112f/0x12a0 net/9p/client.c:781 p9_client_version net/9p/client.c:952 [inline] p9_client_create+0xb7f/0x1430 net/9p/client.c:1052 v9fs_session_init+0x1e7/0x18c0 fs/9p/v9fs.c:406 v9fs_mount+0x7d/0x920 fs/9p/vfs_super.c:120 legacy_get_tree+0x108/0x220 fs/fs_context.c:661 vfs_get_tree+0x8e/0x390 fs/super.c:1413 do_new_mount fs/namespace.c:2791 [inline] do_mount+0x13b3/0x1c30 fs/namespace.c:3111 ksys_mount+0xdb/0x150 fs/namespace.c:3320 __do_sys_mount fs/namespace.c:3334 [inline] __se_sys_mount fs/namespace.c:3331 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3331 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x459879 Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:7fb2aca53c78 EFLAGS: 0246 ORIG_RAX: 00a5 RAX: ffda RBX: 0005 RCX: 00459879 RDX: 22c0 RSI: 2040 RDI: RBP: 0075bfc8 R08: 2400 R09: R10: R11: 0246 R12: 7fb2aca546d4 R13: 004c5e2f R14: 004da930 R15: Modules linked in: ---[ end trace c76f5f29f0af3347 ]--- RIP: 0010:__list_del_entry_valid.cold+0x23/0x4f lib/list_debug.c:45 Code: e8 d5 06 1e fe 0f 0b 4c 89 f6 48 c7 c7 e0 26 c6 87 e8 c4 06 1e fe 0f 0b 4c 89 ea 4c 89 f6 48 c7 c7 20 26 c6 87 e8 b0 06 1e fe <0f> 0b 4c 89 e2 4c 89 f6 48 c7 c7 80 26 c6 87 e8 9c 06 1e fe 0f 0b RSP: 0018:8880994076d8 EFLAGS: 00010286 RAX: 004e RBX: 111013280ee9 RCX: RDX: RSI: 815c2526 RDI: ed1013280ecd RBP: 8880994076f0 R08: 004e R09: ed1015d060d1 R10: ed1015d060d0 R11: 8880ae830687 R12: dead0122 R13: dead0100 R14: 88808ecdbfb0 R15: 88808ecdbfb8 FS: 7fb2aca54700() GS:8880ae80() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7ffee6574f58 CR3: a8e6d000 CR4: 001406f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 --- This bug is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this bug report. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot. syzbot can test patches for this bug, for details see: https://goo.gl/tpsmEJ#testing-patches