[PATCH v1 2/2] drm/bridge: anx7625: Add anx7625 MIPI to DP bridge driver
The ANX7625 is an ultra-low power 4K Mobile HD Transmitter designed for portable device. It converts MIPI to DisplayPort 1.3 4K. Signed-off-by: Xin Ji --- drivers/gpu/drm/bridge/Makefile |2 +- drivers/gpu/drm/bridge/analogix/Kconfig |6 + drivers/gpu/drm/bridge/analogix/Makefile |1 + drivers/gpu/drm/bridge/analogix/anx7625.c | 2086 + drivers/gpu/drm/bridge/analogix/anx7625.h | 397 ++ 5 files changed, 2491 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/bridge/analogix/anx7625.c create mode 100644 drivers/gpu/drm/bridge/analogix/anx7625.h diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 4934fcf..bcd388a 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -12,8 +12,8 @@ obj-$(CONFIG_DRM_SII9234) += sii9234.o obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o obj-$(CONFIG_DRM_TOSHIBA_TC358764) += tc358764.o obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o -obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/ obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/ obj-$(CONFIG_DRM_TI_SN65DSI86) += ti-sn65dsi86.o obj-$(CONFIG_DRM_TI_TFP410) += ti-tfp410.o +obj-y += analogix/ obj-y += synopsys/ diff --git a/drivers/gpu/drm/bridge/analogix/Kconfig b/drivers/gpu/drm/bridge/analogix/Kconfig index e930ff9..b2f127e 100644 --- a/drivers/gpu/drm/bridge/analogix/Kconfig +++ b/drivers/gpu/drm/bridge/analogix/Kconfig @@ -2,3 +2,9 @@ config DRM_ANALOGIX_DP tristate depends on DRM + +config ANALOGIX_ANX7625 + tristate "Analogix MIPI to DP interface support" + help + ANX7625 is an ultra-low power 4K mobile HD transmitter designed + for portable devices. It converts MIPI/DPI to DisplayPort1.3 4K. diff --git a/drivers/gpu/drm/bridge/analogix/Makefile b/drivers/gpu/drm/bridge/analogix/Makefile index fdbf3fd..8a52867 100644 --- a/drivers/gpu/drm/bridge/analogix/Makefile +++ b/drivers/gpu/drm/bridge/analogix/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ANALOGIX_ANX7625) += anx7625.o analogix_dp-objs := analogix_dp_core.o analogix_dp_reg.o obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix_dp.o diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c new file mode 100644 index 000..5d6bc5f --- /dev/null +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -0,0 +1,2086 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright(c) 2016, Analogix Semiconductor. All rights reserved. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "anx7625.h" + +/* + * there is a sync issue while access I2C register between AP(CPU) and + * internal firmware(OCM), to avoid the race condition, AP should access + * the reserved slave address before slave address occurs changes. + */ +static int i2c_access_workaround(struct anx7625_data *ctx, +struct i2c_client *client) +{ + u8 offset; + struct device *dev = &client->dev; + struct i2c_client *last_client = ctx->last_client; + int ret = 0; + + if (client != last_client) { + ctx->last_client = client; + + if (client == ctx->i2c.tcpc_client) + offset = RSVD_00_ADDR; + else if (client == ctx->i2c.tx_p0_client) + offset = RSVD_D1_ADDR; + else if (client == ctx->i2c.tx_p1_client) + offset = RSVD_60_ADDR; + else if (client == ctx->i2c.rx_p0_client) + offset = RSVD_39_ADDR; + else if (client == ctx->i2c.rx_p1_client) + offset = RSVD_7F_ADDR; + else + offset = RSVD_00_ADDR; + + ret = i2c_smbus_write_byte_data(client, offset, 0x00); + if (ret < 0) + DRM_DEV_ERROR(dev, + "failed to access i2c id=%x\n:%x", + client->addr, offset); + } + + return ret; +} + +static int anx7625_reg_read(struct anx7625_data *ctx, + struct i2c_client *client, u8 reg_addr) +{ + int ret; + struct device *dev = &client->dev; + + i2c_access_workaround(ctx, client); + + ret = i2c_smbus_read_byte_data(client, reg_addr); + if (ret < 0) + DRM_DEV_ERROR(dev, "read i2c failed id=%x:%x\n", + client->addr, reg_addr); + + return ret; +} + +static int anx7625_reg_block_read(struct anx7625_data *ctx, + struct i2c_client *client, + u8 reg_addr, u8 len, u8 *buf) +
Re: Usecases for the per-task latency-nice attribute
On 9/18/19 7:48 PM, Patrick Bellasi wrote: > > On Wed, Sep 18, 2019 at 13:41:04 +0100, Parth Shah wrote... > >> Hello everyone, > > Hi Parth, > thanks for staring this discussion. > > [ + patrick.bell...@matbug.net ] my new email address, since with > @arm.com I will not be reachable anymore starting next week. > Noted. I will send new version with the summary of all the discussion and add more people to CC. Will change your mail in that, thanks for notifying me. >> As per the discussion in LPC2019, new per-task property like latency-nice >> can be useful in certain scenarios. The scheduler can take proper decision >> by knowing latency requirement of a task from the end-user itself. >> >> There has already been an effort from Subhra for introducing Task >> latency-nice [1] values and have seen several possibilities where this type >> of >> interface can be used. >> >> From the best of my understanding of the discussion on the mail thread and >> in the LPC2019, it seems that there are two dilemmas; >> >> 1. Name: What should be the name for such attr for all the possible usecases? >> = >> Latency nice is the proposed name as of now where the lower value indicates >> that the task doesn't care much for the latency > > If by "lower value" you mean -19 (in the proposed [-20,19] range), then > I think the meaning should be the opposite. > Oops, my bad. i wanted to tell higher value but somehow missed that latency-nice should be the opposite to the latency sensitivity. But in the further scope of the discussion, I mean -19 to be the least value (latency sensitive) and +20 to be the greatest value(does not care for latency) if range is [-19,20] > A -19 latency-nice task is a task which is not willing to give up > latency. For those tasks for example we want to reduce the wake-up > latency at maximum. > > This will keep its semantic aligned to that of process niceness values > which range from -20 (most favourable to the process) to 19 (least > favourable to the process). Totally agreed upon. > >> and we can spend some more time in the kernel to decide a better >> placement of a task (to save time, energy, etc.) > > Tasks with an high latency-nice value (e.g. 19) are "less sensible to > latency". These are tasks we wanna optimize mainly for throughput and > thus, for example, we can spend some more time to find out a better task > placement at wakeup time. > > Does that makes sense? Correct. Task placement is one way to optimize which can benefit to both the server and embedded world by saving power without compromising much on performance. > >> But there seems to be a bit of confusion on whether we want biasing as well >> (latency-biased) or something similar, in which case "latency-nice" may >> confuse the end-user. > > AFAIU PeterZ point was "just" that if we call it "-nice" it has to > behave as "nice values" to avoid confusions to users. But, if we come up > with a different naming maybe we will have more freedom. > > Personally, I like both "latency-nice" or "latency-tolerant", where: > > - latency-nice: >should have a better understanding based on pre-existing concepts > > - latency-tolerant: >decouples a bit its meaning from the niceness thus giving maybe a bit >more freedom in its complete definition and perhaps avoid any >possible interpretation confusion like the one I commented above. > > Fun fact: there was also the latency-nasty proposal from PaulMK :) > Cool. In that sense, latency-tolerant seems to be more flexible covering multiple functionality that a scheduler can provide with such userspace hints. >> 2. Value: What should be the range of possible values supported by this new >> attr? >> == >> The possible values of such task attribute still need community attention. >> Do we need a range of values or just binary/ternary values are sufficient? >> Also signed or unsigned and so the length of the variable (u64, s32, >> etc)? > > AFAIR, the proposal on the table are essentially two: > > A) use a [-20,19] range > > Which has similarities with the niceness concept and gives a minimal > continuous range. This can be on hand for things like scaling the > vruntime normalization [3] > > B) use some sort of "profile tagging" > e.g. background, latency-sensible, etc... > > If I correctly got what PaulT was proposing toward the end of the > discussion at LPC. > If I got it right, then for option B, we can have this attr to be used as a latency_flag just like per-process flags (e.g. PF_IDLE). If so, then we can piggyback on the p->flags itself, hence I will prefer the range unless we have multiple usecases which can not get best out of the range. > This last option deserves better exploration. > > At first glance I'm more for option A, I see a range as something that: > > - gives us a bit of flexibility in terms of the possible internal > usages of the actual value > > - better suppor
Re: [PATCH] iwlwifi: fix a potential NULL pointer dereference
On Wed, 2019-09-18 at 23:41 +0530, Allen Pais wrote: > alloc_workqueue is not checked for errors and as a result, > a potential NULL dereference could occur. Wonder why this is coming out now ... but I don't think kmalloc() was ever 'fixed' to fail for small allocations, so I guess this will never fail? Anyway, as 0-day bot pointed out, this isn't really right. The cleanup paths here are also tricky, so I arrived at this patch a few days ago: diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index eb544811759d..882fdf7e5e7b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3530,6 +3530,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3664,10 +3673,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CPTCFG_IWLWIFI_DEBUGFS trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED; mutex_init(&trans_pcie->fw_mon_data.mutex); @@ -3681,6 +3686,8 @@ out_free_ict: iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } johannes
Re: [PATCH] mm: Support memblock alloc on the exact node for sparse_buffer_init()
On 2019/9/19 12:47, Mike Rapoport wrote: > Hi, > > On Wed, Sep 18, 2019 at 12:22:29PM +0800, Yunfeng Ye wrote: >> Currently, when memblock_find_in_range_node() fail on the exact node, it >> will use %NUMA_NO_NODE to find memblock from other nodes. At present, >> the work is good, but when the large memory is insufficient and the >> small memory is enough, we want to allocate the small memory of this >> node first, and do not need to allocate large memory from other nodes. >> >> In sparse_buffer_init(), it will prepare large chunks of memory for page >> structure. The page management structure requires a lot of memory, but >> if the node does not have enough memory, it can be converted to a small >> memory allocation without having to allocate it from other nodes. >> >> Add %MEMBLOCK_ALLOC_EXACT_NODE flag for this situation. Normally, the >> behavior is the same with %MEMBLOCK_ALLOC_ACCESSIBLE, only that it will >> not allocate from other nodes when a single node fails to allocate. >> >> If large contiguous block memory allocated fail in sparse_buffer_init(), >> it will allocates small block memmory section by section later. > > Did you see the sparse_buffer_init() actually falling back to allocate from a > different node? If a node does not have enough memory to hold it's own > memory map, filling only it with parts of the memory map will not make such > node usable. > Normally, it won't happen that sparse_buffer_init() falling back from a different node, because page structure size is 64 bytes per 4KB of memory, no more than 2% of total available memory. But in the special cases, for eaxmple, memory address is isolated by BIOS when memory failure, split the total memory many pieces, although we have enough memory, but no large contiguous block memory in one node. sparse_buffer_init() needs large contiguous block memory to be alloc in one time, Eg, the size of memory is 1TB, sparse_buffer_init() need 1TB * 64/4096 = 16GB, but we have 100 blocks memory which every block only have 10GB, although total memory have almost 100*10GB=1TB, but no contiguous 16GB block. Before commit 2a3cb8baef71 ("mm/sparse: delete old sparse_init and enable new one"), we have %CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER config to meeting this situation, after that, it fall back to allocate memory from other nodes, so have the performance impact by remote numa access. commit 85c77f791390 ("mm/sparse: add new sparse_init_nid() and sparse_init()") wrote that: " sparse_init_nid(), which only operates within one memory node, and thus allocates memory either in large contiguous block or allocates section by section " it means that allocates section by section is a normal choice too, so I think add %MEMBLOCK_ALLOC_EXACT_NODE is also a choice for this situation. Most cases, sparse_buffer_init() works good and not allocated from other nodes at present. thanks. Yunfeng Ye >> Signed-off-by: Yunfeng Ye >> --- >> include/linux/memblock.h | 1 + >> mm/memblock.c| 3 ++- >> mm/sparse.c | 2 +- >> 3 files changed, 4 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/memblock.h b/include/linux/memblock.h >> index f491690..9a81d9c 100644 >> --- a/include/linux/memblock.h >> +++ b/include/linux/memblock.h >> @@ -339,6 +339,7 @@ static inline int memblock_get_region_node(const struct >> memblock_region *r) >> #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) >> #define MEMBLOCK_ALLOC_ACCESSIBLE 0 >> #define MEMBLOCK_ALLOC_KASAN1 >> +#define MEMBLOCK_ALLOC_EXACT_NODE 2 >> >> /* We are using top down, so it is safe to use 0 here */ >> #define MEMBLOCK_LOW_LIMIT 0 >> diff --git a/mm/memblock.c b/mm/memblock.c >> index 7d4f61a..dbd52c3c 100644 >> --- a/mm/memblock.c >> +++ b/mm/memblock.c >> @@ -277,6 +277,7 @@ static phys_addr_t __init_memblock >> memblock_find_in_range_node(phys_addr_t size, >> >> /* pump up @end */ >> if (end == MEMBLOCK_ALLOC_ACCESSIBLE || >> +end == MEMBLOCK_ALLOC_EXACT_NODE || >> end == MEMBLOCK_ALLOC_KASAN) >> end = memblock.current_limit; >> >> @@ -1365,7 +1366,7 @@ static phys_addr_t __init >> memblock_alloc_range_nid(phys_addr_t size, >> if (found && !memblock_reserve(found, size)) >> goto done; >> >> -if (nid != NUMA_NO_NODE) { >> +if (end != MEMBLOCK_ALLOC_EXACT_NODE && nid != NUMA_NO_NODE) { >> found = memblock_find_in_range_node(size, align, start, >> end, NUMA_NO_NODE, >> flags); >> diff --git a/mm/sparse.c b/mm/sparse.c >> index 72f010d..828db46 100644 >> --- a/mm/sparse.c >> +++ b/mm/sparse.c >> @@ -477,7 +477,7 @@ static void __init sparse_buffer_init(unsigned long >> size, int nid) >> sparsemap_buf = >> memblock_alloc_try_nid_raw(size, PAGE_SIZE, >> addr, >> -
Re: [PATCH 2/3] arm64: dts: imx8mm: Use correct clock for usdhc's ipg clk
Hi Anson, I have a question, that is not directly related to this patch. I see that for the usdhc1 and usdhc3 nodes, there is an 'assigned-clock' and 'assigned-clock-rates' property but not for usdhc2. The same applies to the mx8mq and mx8mn dtsi file. Is there any reason for this? If not can you fix it? Thanks, Frieder On 19.09.19 07:05, Anson Huang wrote: > On i.MX8MM, usdhc's ipg clock is from IMX8MM_CLK_IPG_ROOT, > assign it explicitly instead of using IMX8MM_CLK_DUMMY. > > Signed-off-by: Anson Huang > --- > arch/arm64/boot/dts/freescale/imx8mm.dtsi | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi > b/arch/arm64/boot/dts/freescale/imx8mm.dtsi > index 7c4dcce..8aafad2 100644 > --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi > +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi > @@ -694,7 +694,7 @@ > compatible = "fsl,imx8mm-usdhc", > "fsl,imx7d-usdhc"; > reg = <0x30b4 0x1>; > interrupts = ; > - clocks = <&clk IMX8MM_CLK_DUMMY>, > + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, ><&clk IMX8MM_CLK_NAND_USDHC_BUS>, ><&clk IMX8MM_CLK_USDHC1_ROOT>; > clock-names = "ipg", "ahb", "per"; > @@ -710,7 +710,7 @@ > compatible = "fsl,imx8mm-usdhc", > "fsl,imx7d-usdhc"; > reg = <0x30b5 0x1>; > interrupts = ; > - clocks = <&clk IMX8MM_CLK_DUMMY>, > + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, ><&clk IMX8MM_CLK_NAND_USDHC_BUS>, ><&clk IMX8MM_CLK_USDHC2_ROOT>; > clock-names = "ipg", "ahb", "per"; > @@ -724,7 +724,7 @@ > compatible = "fsl,imx8mm-usdhc", > "fsl,imx7d-usdhc"; > reg = <0x30b6 0x1>; > interrupts = ; > - clocks = <&clk IMX8MM_CLK_DUMMY>, > + clocks = <&clk IMX8MM_CLK_IPG_ROOT>, ><&clk IMX8MM_CLK_NAND_USDHC_BUS>, ><&clk IMX8MM_CLK_USDHC3_ROOT>; > clock-names = "ipg", "ahb", "per"; >
Re: [PATCH] gpio: mxc: Only getting second IRQ when there is more than one IRQ
czw., 19 wrz 2019 o 08:10 Anson Huang napisał(a): > > On some of i.MX SoCs like i.MX8QXP, there is ONLY one IRQ for each > GPIO bank, so it is better to check the IRQ count before getting > second IRQ to avoid below error message during probe: > > [1.070908] gpio-mxc 5d08.gpio: IRQ index 1 not found > [1.077420] gpio-mxc 5d09.gpio: IRQ index 1 not found > [1.083766] gpio-mxc 5d0a.gpio: IRQ index 1 not found > [1.090122] gpio-mxc 5d0b.gpio: IRQ index 1 not found > [1.096470] gpio-mxc 5d0c.gpio: IRQ index 1 not found > [1.102804] gpio-mxc 5d0d.gpio: IRQ index 1 not found > [1.109144] gpio-mxc 5d0e.gpio: IRQ index 1 not found > [1.115475] gpio-mxc 5d0f.gpio: IRQ index 1 not found > > Signed-off-by: Anson Huang > --- > drivers/gpio/gpio-mxc.c | 12 +--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c > index 7907a87..39ba7dd 100644 > --- a/drivers/gpio/gpio-mxc.c > +++ b/drivers/gpio/gpio-mxc.c > @@ -426,9 +426,15 @@ static int mxc_gpio_probe(struct platform_device *pdev) > if (IS_ERR(port->base)) > return PTR_ERR(port->base); > > - port->irq_high = platform_get_irq(pdev, 1); > - if (port->irq_high < 0) > - port->irq_high = 0; > + err = platform_irq_count(pdev); > + if (err < 0) > + return err; > + > + if (err > 1) { Could you use a variable called irq_count or something here? This 'err' is a confusing name for a variable that contains a valid value. Bart > + port->irq_high = platform_get_irq(pdev, 1); > + if (port->irq_high < 0) > + port->irq_high = 0; > + } > > port->irq = platform_get_irq(pdev, 0); > if (port->irq < 0) > -- > 2.7.4 >
Re: [PATCH] mfd: mt6360: add pmic mt6360 driver
On Thu, 19 Sep 2019, Gene Chen wrote: > Lee Jones 於 2019年9月18日 週三 下午6:51寫道: > > > > On Wed, 18 Sep 2019, Gene Chen wrote: > > > > > From: Gene Chen > > > > > > Add mfd driver for mt6360 pmic chip include > > > Battery Charger/USB_PD/Flash LED/RGB LED/LDO/Buck > > > > > > Signed-off-by: Gene Chen > > --- > > > > This looks different from the one you sent before, but I don't see a > > version bump or any changelog in this space. Please re-submit with > > the differences noted. > > > > the change is > 1. add missing include file > 2. modify commit message > > this patch is regarded as version 1 It's different to the first one you sent to the list, so it needs a version bump and a change log. There also appears to still be issues with it, if the auto-builders are to be believed. Do ensure you thoroughly test your patches before sending upstream. Please fix the issues and resubmit your v3 with a nice changelog. > > > drivers/mfd/Kconfig| 12 + > > > drivers/mfd/Makefile | 1 + > > > drivers/mfd/mt6360-core.c | 463 > > > + > > > include/linux/mfd/mt6360-private.h | 279 ++ > > > include/linux/mfd/mt6360.h | 33 +++ > > > 5 files changed, 788 insertions(+) > > > create mode 100644 drivers/mfd/mt6360-core.c > > > create mode 100644 include/linux/mfd/mt6360-private.h > > > create mode 100644 include/linux/mfd/mt6360.h > > -- Lee Jones [李琼斯] Linaro Services Technical Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
[PATCH 1/1] sched/eas: introduce system-wide overutil indicator
When the system is overutilization, the load-balance crossing clusters will be triggered and scheduler will not use energy aware scheduling to choose CPUs. The overutilization means the loading of ANY CPUs exceeds threshold (80%). However, only 1 heavy task or while-1 program will run on highest capacity CPUs and it still result to trigger overutilization. So the system will not use Energy Aware scheduling. To avoid it, a system-wide over-utilization indicator to trigger load-balance cross clusters. The policy is: The loading of "ALL CPUs in the highest capacity" exceeds threshold(80%) or The loading of "Any CPUs not in the highest capacity" exceed threshold(80%) Signed-off-by: YT Chang --- kernel/sched/fair.c | 76 + 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 036be95..f4c3d70 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5182,10 +5182,71 @@ static inline bool cpu_overutilized(int cpu) static inline void update_overutilized_status(struct rq *rq) { if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { - WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); - trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); + if (capacity_orig_of(cpu_of(rq)) < rq->rd->max_cpu_capacity) { + WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); + trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); + } } } + +static +void update_system_overutilized(struct sched_domain *sd, struct cpumask *cpus) +{ + unsigned long group_util; + bool intra_overutil = false; + unsigned long max_capacity; + struct sched_group *group = sd->groups; + struct root_domain *rd; + int this_cpu; + bool overutilized; + int i; + + this_cpu = smp_processor_id(); + rd = cpu_rq(this_cpu)->rd; + overutilized = READ_ONCE(rd->overutilized); + max_capacity = rd->max_cpu_capacity; + + do { + group_util = 0; + for_each_cpu_and(i, sched_group_span(group), cpus) { + group_util += cpu_util(i); + if (cpu_overutilized(i)) { + if (capacity_orig_of(i) < max_capacity) { + intra_overutil = true; + break; + } + } + } + + /* +* A capacity base hint for over-utilization. +* Not to trigger system overutiled if heavy tasks +* in Big.cluster, so +* add the free room(20%) of Big.cluster is impacted which means +* system-wide over-utilization, +* that considers whole cluster not single cpu +*/ + if (group->group_weight > 1 && (group->sgc->capacity * 1024 < + group_util * capacity_margin)) { + intra_overutil = true; + break; + } + + group = group->next; + + } while (group != sd->groups && !intra_overutil); + + if (overutilized != intra_overutil) { + if (intra_overutil == true) { + WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); + trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); + } else { + WRITE_ONCE(rd->overutilized, 0); + trace_sched_overutilized_tp(rd, 0); + } + } +} + #else static inline void update_overutilized_status(struct rq *rq) { } #endif @@ -8242,15 +8303,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* update overload indicator if we are at root domain */ WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD); - - /* Update over-utilization (tipping point, U >= 0) indicator */ - WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED); - trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED); - } else if (sg_status & SG_OVERUTILIZED) { - struct root_domain *rd = env->dst_rq->rd; - - WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); - trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); } } @@ -8476,6 +8528,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) */ update_sd_lb_stats(env, &sds); + update_system_overutilized(env->sd, env->cpus); + if (sched_energy_enabled()) { struct root_domain *rd = env->dst_rq->rd; -- 1.9.1
Re: [PATCH v2 1/2] edac: Add an API for edac device to report for multiple errors
On 9/19/2019 9:33 AM, Robert Richter wrote: On 12.09.19 15:53:04, Hanna Hawa wrote: Add an API for EDAC device to report multiple errors with same type. Signed-off-by: Hanna Hawa --- drivers/edac/edac_device.c | 91 ++ drivers/edac/edac_device.h | 40 + 2 files changed, 131 insertions(+) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 65cf2b9355c4..78ac44103acc 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -643,3 +643,94 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, block ? block->name : "N/A", msg); } EXPORT_SYMBOL_GPL(edac_device_handle_ue); + +void __edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, +unsigned int count, int inst_nr, int block_nr, +const char *msg) +{ Please do not add a copy here, instead modify the existing function and share the code with both, old and new functions. Will be fixed. Thanks, -Robert
[PATCH v2 0/3] dmaengine: sf-pdma: Add platform dma driver
Add PDMA driver support for SiFive HiFive Unleashed RevA00 board. Mainly follows DMAengine controller doc[1] to implement and take other DMA drivers as reference. Such as - drivers/dma/fsl-edma.c - drivers/dma/dw-edma/ - drivers/dma/pxa-dma.c Using DMA test client[2] to test. Detailed datasheet is doc[3]. Driver supports: - 4 physical DMA channels, share same DONE and error interrupt handler. - Support MEM_TO_MEM - Tested by DMA test client - patches include DT Bindgins document and dts for fu450-c000 SoC. Separate dts patch for easier review and apply to different branch or SoC platform. - retry 1 time if DMA error occurs. [Reference Doc] [1] ./Documentation/driver-api/dmaengine/provider.rst [2] ./Documentation/driver-api/dmaengine/dmatest.rst [3] https://static.dev.sifive.com/FU540-C000-v1.0.pdf [Simple steps to test of DMA Test client] $ echo 1 > /sys/module/dmatest/parameters/iterations $ echo dma0chan0 > /sys/module/dmatest/parameters/channel $ echo dma0chan1 > /sys/module/dmatest/parameters/channel $ echo dma0chan2 > /sys/module/dmatest/parameters/channel $ echo dma0chan3 > /sys/module/dmatest/parameters/channel $ echo 1 > /sys/module/dmatest/parameters/run Green Wan (3): dt-bindings: dmaengine: sf-pdma: add bindins for SiFive PDMA riscv: dts: add support for PDMA device of HiFive Unleashed Rev A00 dmaengine: sf-pdma: add platform DMA support for HiFive Unleashed A00 .../bindings/dma/sifive,fu540-c000-pdma.yaml | 63 ++ MAINTAINERS | 6 + arch/riscv/boot/dts/sifive/fu540-c000.dtsi| 7 + drivers/dma/Kconfig | 2 + drivers/dma/Makefile | 1 + drivers/dma/sf-pdma/Kconfig | 6 + drivers/dma/sf-pdma/Makefile | 1 + drivers/dma/sf-pdma/sf-pdma.c | 623 ++ drivers/dma/sf-pdma/sf-pdma.h | 124 9 files changed, 833 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml create mode 100644 drivers/dma/sf-pdma/Kconfig create mode 100644 drivers/dma/sf-pdma/Makefile create mode 100644 drivers/dma/sf-pdma/sf-pdma.c create mode 100644 drivers/dma/sf-pdma/sf-pdma.h -- 2.17.1
[PATCH v2 1/3] dt-bindings: dmaengine: sf-pdma: add bindins for SiFive PDMA
Add DT bindings document for Platform DMA(PDMA) driver of board, HiFive Unleashed Rev A00. Signed-off-by: Green Wan --- .../bindings/dma/sifive,fu540-c000-pdma.yaml | 63 +++ MAINTAINERS | 5 ++ 2 files changed, 68 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml diff --git a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml new file mode 100644 index ..b5423f1cfcaf --- /dev/null +++ b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/sifive,fu540-c000-pdma.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SiFive Unleashed Rev C000 Platform DMA + +maintainers: + - Green Wan + - Palmer Debbelt + - Paul Walmsley + +description: | + Platform DMA is a DMA engine of SiFive Unleashed. It supports 4 + channels. Each channel has 2 interrupts. One is for DMA done and + the other is for DME error. + + In different SoC, DMA could be attached to different IRQ line. + DT file need to be changed to meet the difference. For technical + doc, + + https://static.dev.sifive.com/FU540-C000-v1.0.pdf + +properties: + compatible: +items: + - const: sifive,fu540-c000-pdma + + reg: +maxItems: 1 + + interrupts: +minItems: 8 +maxItems: 8 + + interrupt-parent: +description: + Interrupt parent must correspond to the name PLIC interrupt + controller, i.e. "plic0" +maxItems: 1 + + '#dma-cells': +const: 1 + +required: + - compatible + - reg + - interrupt-parent + - interrupts + - '#dma-cells' + +examples: + - | +dma@300 { + compatible = "sifive,fu540-c000-pdma"; + reg = <0x0 0x300 0x0 0x8000>; + interrupt-parent = <&plic0>; + interrupts = <23 24 25 26 27 28 29 30>; + #dma-cells = <1>; +}; + +... diff --git a/MAINTAINERS b/MAINTAINERS index 49f75d1b7b51..d0caa09a479e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14591,6 +14591,11 @@ F: drivers/media/usb/siano/ F: drivers/media/usb/siano/ F: drivers/media/mmc/siano/ +SIFIVE PDMA DRIVER +M: Green Wan +S: Maintained +F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml + SIFIVE DRIVERS M: Palmer Dabbelt M: Paul Walmsley -- 2.17.1
Re: Linux 5.3-rc8
Dear Lennart. Lennart Poettering - 18.09.19, 15:53:25 CEST: > On Mi, 18.09.19 00:10, Martin Steigerwald (mar...@lichtvoll.de) wrote: > > > getrandom() will never "consume entropy" in a way that will block > > > any > > > users of getrandom(). If you don't have enough collected entropy > > > to > > > seed the rng, getrandom() will block. If you do, getrandom() will > > > generate as many numbers as you ask it to, even if no more entropy > > > is > > > ever collected by the system. So it doesn't matter how many > > > clients > > > you have calling getrandom() in the boot process - either there'll > > > be > > > enough entropy available to satisfy all of them, or there'll be > > > too > > > little to satisfy any of them. > > > > Right, but then Systemd would not use getrandom() for initial > > hashmap/ UUID stuff since it > > Actually things are more complex. In systemd there are four classes of > random values we need: > > 1. High "cryptographic" quality. There are very few needs for this in […] > 2. High "non-cryptographic" quality. This is used for example for […] > 3. Medium quality. This is used for seeding hash tables. These may be […] > 4. Crap quality. There are only a few uses of this, where rand_r() is >is OK. > > Of these four case, the first two might block boot. Because the first > case is not common you won't see blocking that often though for > them. The second case is very common, but since we use RDRAND you > won't see it on any recent Intel machines. > > Or to say this all differently: the hash table seeding and the uuid > case are two distinct cases in systemd, and I am sure they should be. Thank you very much for your summary of uses of random numbers in Systemd and also for your other mail that "neither RDRAND nor /dev/ urandom know a concept of of "depleting entropy"". I thought they would deplete entropy needed to the initial seeding of crng. Thank you also for taking part in this discussion, even if someone put your mail address on carbon copy without asking with. I do not claim I understand enough of this random number stuff. But I feel its important that kernel and userspace developers actually talk with each other about a sane approach for it. And I believe that the complexity involved is part of the issue. I feel an API for attaining random number with different quality levels needs to be much, much, much more simple to use *properly*. I felt a bit overwhelmed by the discussion (and by what else is happening in my life, just having come back from holding a Linux performance workshop in front of about two dozen people), so I intend to step back from it. If one of my mails actually helped to encourage or facilitate kernel space and user space developers talking with each other about a sane approach to random numbers, then I may have used my soft skills in a way that brings some benefit. For the technical aspects certainly people are taking part in this discussion who are much much deeper into the intricacies of entropy in Linux and computers in general, so I just hope for a good outcome. Best, -- Martin
Re: [PATCH v2 3/3] dt-bindings: ddr: Add bindings for Samsung LPDDR3 memories
On Thu, 19 Sep 2019 at 08:49, Lukasz Luba wrote: > > Hi Krzysztof, > > On 9/18/19 8:51 PM, Krzysztof Kozlowski wrote: > > On Mon, 16 Sep 2019 at 12:07, Lukasz Luba > > wrote: > >> > >> Add compatible for Samsung k3qf2f20db LPDDR3 memory bindings. > >> Introduce minor fixes in the old documentation. > >> > >> Signed-off-by: Lukasz Luba > >> --- > >> Documentation/devicetree/bindings/ddr/lpddr3.txt | 9 ++--- > >> 1 file changed, 6 insertions(+), 3 deletions(-) > >> > >> diff --git a/Documentation/devicetree/bindings/ddr/lpddr3.txt > >> b/Documentation/devicetree/bindings/ddr/lpddr3.txt > >> index 3b2485b84b3f..49afe794daaa 100644 > >> --- a/Documentation/devicetree/bindings/ddr/lpddr3.txt > >> +++ b/Documentation/devicetree/bindings/ddr/lpddr3.txt > >> @@ -1,7 +1,9 @@ > >> * LPDDR3 SDRAM memories compliant to JEDEC JESD209-3C > >> > >> Required properties: > >> -- compatible : Should be - "jedec,lpddr3" > >> +- compatible : should be one of the following: > >> + Generic default - "jedec,lpddr3". > > > > The convention is first compatible, then description. I gave you the > > example to base on - at25. Why making it different? > > I have checked at25 that you pointed me to and also checked at24, which > has a bit longer "compatible" section. > > I found that there are many "jedec,spi-nor" compatible devices, which I > thought would be a better example for my "jedec,lpddr3". > For example, two configurations, where you have a single labels or dual > (with specific device) > arch/arm/boot/dts/imx6dl-rex-basic.dts: > compatible = "sst,sst25vf016b", "jedec,spi-nor"; > arch/arm/boot/dts/imx6q-ba16.dtsi: > compatible = "jedec,spi-nor"; > > The 'compatible' in documentation for the "jedec,spi-nor" is slightly > different (similar to at24). > Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt > It has a long explanation, which is also OK. So I thought that it is > quite flexible what you put in there. It is flexible but I see clear pattern in existing sources: jedec,spi-nor.txt compatible : May include a device-specific .. ... Supported chip names: at25df321a ... at25.txt: - compatible : Should be ",", and generic value "atmel,at25". Example "," values: "anvo,anv32e61w" "microchip,25lc040" In these cases the doc says that "compatible should be" and then you have the list of values. Your example says that the compatible should be "Generic default" or "For Samsung 542x SoC"... :) The difference is slight but putting the value first is a simple and elegant solution. In your case one has to go to the end of sentence to find the most important information - the compatible value. > I have also checked Cadance QSPI controller. > Documentation/devicetree/bindings/mtd/cadence-quadspi.txt > The controller might be built-in into different vendor SoC's > and the "compatible" is ready to reflect it in similar fashion but > with a short explanation in this section. I see. I do not find this pattern as much readable as jedec-spi-nor or at25 therefore I mentioned them as an example to base on ("Exactly the same as AT24 or AT25 EEPROM bindings."). We can avoid also this entire discussion with YAML (which also follows approach of at25 - value first). > Therefore, what you see in the patch draw heavily on Cadence's qspi, > with a bit of inspiration from jedec,spi-nor usage. > > Should I change it to at25 "compatible" style and send next patch? Yes, please. Or go to YAML and make entire discussion obsolete. Best regards, Krzysztof
[PATCH v2 2/3] riscv: dts: add support for PDMA device of HiFive Unleashed Rev A00
Add PDMA support to (arch/riscv/boot/dts/sifive/fu540-c000.dtsi) Signed-off-by: Green Wan --- arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 42b5ec223100..d3030d7fb45c 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -164,6 +164,13 @@ clocks = <&prci PRCI_CLK_TLCLK>; status = "disabled"; }; + dma: dma@300 { + compatible = "sifive,fu540-c000-pdma"; + reg = <0x0 0x300 0x0 0x8000>; + interrupt-parent = <&plic0>; + interrupts = <23 24 25 26 27 28 29 30>; + #dma-cells = <1>; + }; uart1: serial@10011000 { compatible = "sifive,fu540-c000-uart", "sifive,uart0"; reg = <0x0 0x10011000 0x0 0x1000>; -- 2.17.1
[PATCH v2 3/3] dmaengine: sf-pdma: add platform DMA support for HiFive Unleashed A00
Link: https://www.kernel.org/doc/html/v4.17/driver-api/dmaengine/ Link: https://static.dev.sifive.com/FU540-C000-v1.0.pdf Add PDMA driver, sf-pdma, to enable DMA engine on HiFive Unleashed Rev A00 board. - Implement dmaengine APIs, support MEM_TO_MEM async copy. - Tested by DMA Test client - Supports 4 channels DMA, each channel has 1 done and 1 err interrupt connected to platform-level interrupt controller (PLIC). - Depends on DMA_ENGINE and DMA_VIRTUAL_CHANNELS Follow the DMAengine controller doc, "./Documentation/driver-api/dmaengine/provider.rst" to implement DMA engine. And use the dma test client in doc, "./Documentation/driver-api/dmaengine/dmatest.rst", to test. Each DMA channel has separate HW regs and support done and error ISRs. 4 channels share 1 done and 1 err ISRs. There's no expander/arbitrator in DMA HW. -- -- ||--< done 23 >--|ch 0| ||--< err 24 >--|| (dma0chan0) || -- || -- ||--< done 25 >--|ch 1| ||--< err 26 >--|| (dma0chan1) |PLIC| -- || -- ||--< done 27 >--|ch 2| ||--< err 28 >--|| (dma0chan2) || -- || -- ||--< done 29 >--|ch 3| ||--< err 30 >--|| (dma0chan3) -- -- Signed-off-by: Green Wan --- MAINTAINERS | 1 + drivers/dma/Kconfig | 2 + drivers/dma/Makefile | 1 + drivers/dma/sf-pdma/Kconfig | 6 + drivers/dma/sf-pdma/Makefile | 1 + drivers/dma/sf-pdma/sf-pdma.c | 623 ++ drivers/dma/sf-pdma/sf-pdma.h | 124 +++ 7 files changed, 758 insertions(+) create mode 100644 drivers/dma/sf-pdma/Kconfig create mode 100644 drivers/dma/sf-pdma/Makefile create mode 100644 drivers/dma/sf-pdma/sf-pdma.c create mode 100644 drivers/dma/sf-pdma/sf-pdma.h diff --git a/MAINTAINERS b/MAINTAINERS index d0caa09a479e..c5f0662c9106 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14594,6 +14594,7 @@ F: drivers/media/mmc/siano/ SIFIVE PDMA DRIVER M: Green Wan S: Maintained +F: drivers/dma/sf-pdma/ F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml SIFIVE DRIVERS diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 413efef5fbb6..f05a928f5a3d 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -667,6 +667,8 @@ source "drivers/dma/qcom/Kconfig" source "drivers/dma/dw/Kconfig" +source "drivers/dma/sf-pdma/Kconfig" + source "drivers/dma/dw-edma/Kconfig" source "drivers/dma/hsu/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 5bddf6f8790f..77a552d970ae 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/ obj-$(CONFIG_DW_DMAC_CORE) += dw/ +obj-$(CONFIG_SF_PDMA) += sf-pdma/ obj-$(CONFIG_DW_EDMA) += dw-edma/ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_FSL_DMA) += fsldma.o diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig new file mode 100644 index ..0e01a5728a79 --- /dev/null +++ b/drivers/dma/sf-pdma/Kconfig @@ -0,0 +1,6 @@ +config SF_PDMA + bool "Sifive PDMA controller driver" + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the SiFive PDMA controller. diff --git a/drivers/dma/sf-pdma/Makefile b/drivers/dma/sf-pdma/Makefile new file mode 100644 index ..764552ab8d0a --- /dev/null +++ b/drivers/dma/sf-pdma/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SF_PDMA) += sf-pdma.o diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c new file mode 100644 index ..0ebf8a819811 --- /dev/null +++ b/drivers/dma/sf-pdma/sf-pdma.c @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/** + * SiFive FU540 Platform DMA driver + * Copyright (C) 2019 SiFive + * + * Based partially on: + * - drivers/dma/fsl-edma.c + * - drivers/dma/dw-edma/ + * - drivers/dma/pxa-dma.c + * + * See the following sources for further documentation: + * - Chapter 12 "Platform DMA Engine (PDMA)" of + * SiFive FU540-C000 v1.0 + * https://static.dev.sifive.com/FU540-C000-v1.0.pdf + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sf-pdma.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +#define SIFIVE_PDMA_NAME "sf-pdma" + +#ifndef readq +static inline unsigned long long readq(void __iomem *addr) +{ + return readl(addr) | (((unsigned long long)readl(addr + 4)) << 32LL); +} +#endif + +#ifndef writeq +static inline void writeq(unsigned long long v, void __iomem *addr) +{ + writel(v & 0x, addr); + writel(v >> 32, addr + 4); +} +
Re: [PATCH] Bluetooth: btusb: avoid unused function warning
Hi Arnd, > The btusb_rtl_cmd_timeout() function is used inside of an > ifdef, leading to a warning when this part is hidden > from the compiler: > > drivers/bluetooth/btusb.c:530:13: error: unused function > 'btusb_rtl_cmd_timeout' [-Werror,-Wunused-function] > > Use an IS_ENABLED() check instead so the compiler can see > the code and then discard it silently. > > Fixes: d7ef0d1e3968 ("Bluetooth: btusb: Use cmd_timeout to reset Realtek > device") > Signed-off-by: Arnd Bergmann > --- > drivers/bluetooth/btusb.c | 5 ++--- > 1 file changed, 2 insertions(+), 3 deletions(-) > > diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c > index a9c35ebb30f8..23e606aaaea4 100644 > --- a/drivers/bluetooth/btusb.c > +++ b/drivers/bluetooth/btusb.c > @@ -3807,8 +3807,8 @@ static int btusb_probe(struct usb_interface *intf, > btusb_check_needs_reset_resume(intf); > } > > -#ifdef CONFIG_BT_HCIBTUSB_RTL > - if (id->driver_info & BTUSB_REALTEK) { > + if (IS_ENABLED(CONFIG_BT_HCIBTUSB_RTL) && > + (id->driver_info & BTUSB_REALTEK)) { > hdev->setup = btrtl_setup_realtek; > hdev->shutdown = btrtl_shutdown_realtek; > hdev->cmd_timeout = btusb_rtl_cmd_timeout; > @@ -3819,7 +3819,6 @@ static int btusb_probe(struct usb_interface *intf, >*/ > set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); > } > -#endif I prefer that we stick another ifdef around the btusb_rtl_cmd_timeout function since that is how we did it for the other vendors as well. However I start to wonder if we need all these vendor ifdef anyway. The vendor specific functions should turn into empty stubs if their support is not selected. Regards Marcel
[PATCH v3 07/10] sched/fair: evenly spread tasks when not overloaded
When there is only 1 cpu per group, using the idle cpus to evenly spread tasks doesn't make sense and nr_running is a better metrics. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 40 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 15ec38c..a7c8ee6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8596,18 +8596,34 @@ static struct sched_group *find_busiest_group(struct lb_env *env) busiest->sum_nr_running > local->sum_nr_running + 1) goto force_balance; - if (busiest->group_type != group_overloaded && -(env->idle == CPU_NOT_IDLE || - local->idle_cpus <= (busiest->idle_cpus + 1))) - /* -* If the busiest group is not overloaded -* and there is no imbalance between this and busiest group -* wrt idle CPUs, it is balanced. The imbalance -* becomes significant if the diff is greater than 1 otherwise -* we might end up to just move the imbalance on another -* group. -*/ - goto out_balanced; + if (busiest->group_type != group_overloaded) { + if (env->idle == CPU_NOT_IDLE) + /* +* If the busiest group is not overloaded (and as a +* result the local one too) but this cpu is already +* busy, let another idle cpu try to pull task. +*/ + goto out_balanced; + + if (busiest->group_weight > 1 && + local->idle_cpus <= (busiest->idle_cpus + 1)) + /* +* If the busiest group is not overloaded +* and there is no imbalance between this and busiest +* group wrt idle CPUs, it is balanced. The imbalance +* becomes significant if the diff is greater than 1 +* otherwise we might end up to just move the imbalance +* on another group. Of course this applies only if +* there is more than 1 CPU per group. +*/ + goto out_balanced; + + if (busiest->sum_h_nr_running == 1) + /* +* busiest doesn't have any tasks waiting to run +*/ + goto out_balanced; + } force_balance: /* Looks like there is an imbalance. Compute it */ -- 2.7.4
[PATCH v3 03/10] sched/fair: remove meaningless imbalance calculation
clean up load_balance and remove meaningless calculation and fields before adding new algorithm. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 105 +--- 1 file changed, 1 insertion(+), 104 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02ab6b5..017aad0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5390,18 +5390,6 @@ static unsigned long capacity_of(int cpu) return cpu_rq(cpu)->cpu_capacity; } -static unsigned long cpu_avg_load_per_task(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running); - unsigned long load_avg = cpu_runnable_load(rq); - - if (nr_running) - return load_avg / nr_running; - - return 0; -} - static void record_wakee(struct task_struct *p) { /* @@ -7677,7 +7665,6 @@ static unsigned long task_h_load(struct task_struct *p) struct sg_lb_stats { unsigned long avg_load; /*Avg load across the CPUs of the group */ unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long load_per_task; unsigned long group_capacity; unsigned long group_util; /* Total utilization of the group */ unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ @@ -8059,9 +8046,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_capacity = group->sgc->capacity; sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; - if (sgs->sum_h_nr_running) - sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running; - sgs->group_weight = group->group_weight; sgs->group_no_capacity = group_is_overloaded(env, sgs); @@ -8292,76 +8276,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } /** - * fix_small_imbalance - Calculate the minor imbalance that exists - * amongst the groups of a sched_domain, during - * load balancing. - * @env: The load balancing environment. - * @sds: Statistics of the sched_domain whose imbalance is to be calculated. - */ -static inline -void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) -{ - unsigned long tmp, capa_now = 0, capa_move = 0; - unsigned int imbn = 2; - unsigned long scaled_busy_load_per_task; - struct sg_lb_stats *local, *busiest; - - local = &sds->local_stat; - busiest = &sds->busiest_stat; - - if (!local->sum_h_nr_running) - local->load_per_task = cpu_avg_load_per_task(env->dst_cpu); - else if (busiest->load_per_task > local->load_per_task) - imbn = 1; - - scaled_busy_load_per_task = - (busiest->load_per_task * SCHED_CAPACITY_SCALE) / - busiest->group_capacity; - - if (busiest->avg_load + scaled_busy_load_per_task >= - local->avg_load + (scaled_busy_load_per_task * imbn)) { - env->imbalance = busiest->load_per_task; - return; - } - - /* -* OK, we don't have enough imbalance to justify moving tasks, -* however we may be able to increase total CPU capacity used by -* moving them. -*/ - - capa_now += busiest->group_capacity * - min(busiest->load_per_task, busiest->avg_load); - capa_now += local->group_capacity * - min(local->load_per_task, local->avg_load); - capa_now /= SCHED_CAPACITY_SCALE; - - /* Amount of load we'd subtract */ - if (busiest->avg_load > scaled_busy_load_per_task) { - capa_move += busiest->group_capacity * - min(busiest->load_per_task, - busiest->avg_load - scaled_busy_load_per_task); - } - - /* Amount of load we'd add */ - if (busiest->avg_load * busiest->group_capacity < - busiest->load_per_task * SCHED_CAPACITY_SCALE) { - tmp = (busiest->avg_load * busiest->group_capacity) / - local->group_capacity; - } else { - tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) / - local->group_capacity; - } - capa_move += local->group_capacity * - min(local->load_per_task, local->avg_load + tmp); - capa_move /= SCHED_CAPACITY_SCALE; - - /* Move if we gain throughput */ - if (capa_move > capa_now) - env->imbalance = busiest->load_per_task; -} - -/** * calculate_imbalance - Calculate the amount of imbalance present within the * groups of a given sched_domain during load balance. * @env: load balance environment @@ -8380,15 +8294,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return; } - if (bus
[PATCH v3 04/10] sched/fair: rework load_balance
The load_balance algorithm contains some heuristics which have become meaningless since the rework of the scheduler's metrics like the introduction of PELT. Furthermore, load is an ill-suited metric for solving certain task placement imbalance scenarios. For instance, in the presence of idle CPUs, we should simply try to get at least one task per CPU, whereas the current load-based algorithm can actually leave idle CPUs alone simply because the load is somewhat balanced. The current algorithm ends up creating virtual and meaningless value like the avg_load_per_task or tweaks the state of a group to make it overloaded whereas it's not, in order to try to migrate tasks. load_balance should better qualify the imbalance of the group and clearly define what has to be moved to fix this imbalance. The type of sched_group has been extended to better reflect the type of imbalance. We now have : group_has_spare group_fully_busy group_misfit_task group_asym_capacity group_imbalanced group_overloaded Based on the type fo sched_group, load_balance now sets what it wants to move in order to fix the imbalance. It can be some load as before but also some utilization, a number of task or a type of task: migrate_task migrate_util migrate_load migrate_misfit This new load_balance algorithm fixes several pending wrong tasks placement: - the 1 task per CPU case with asymmetrics system - the case of cfs task preempted by other class - the case of tasks not evenly spread on groups with spare capacity Also the load balance decisions have been consolidated in the 3 functions below after removing the few bypasses and hacks of the current code: - update_sd_pick_busiest() select the busiest sched_group. - find_busiest_group() checks if there is an imbalance between local and busiest group. - calculate_imbalance() decides what have to be moved. Finally, the now unused field total_running of struct sd_lb_stats has been removed. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 585 ++-- 1 file changed, 380 insertions(+), 205 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 017aad0..d33379c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7078,11 +7078,26 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; enum fbq_type { regular, remote, all }; +/* + * group_type describes the group of CPUs at the moment of the load balance. + * The enum is ordered by pulling priority, with the group with lowest priority + * first so the groupe_type can be simply compared when selecting the busiest + * group. see update_sd_pick_busiest(). + */ enum group_type { - group_other = 0, + group_has_spare = 0, + group_fully_busy, group_misfit_task, + group_asym_packing, group_imbalanced, - group_overloaded, + group_overloaded +}; + +enum migration_type { + migrate_load = 0, + migrate_util, + migrate_task, + migrate_misfit }; #define LBF_ALL_PINNED 0x01 @@ -7115,7 +7130,7 @@ struct lb_env { unsigned intloop_max; enum fbq_type fbq_type; - enum group_type src_grp_type; + enum migration_type balance_type; struct list_headtasks; }; @@ -7347,7 +7362,7 @@ static int detach_tasks(struct lb_env *env) { struct list_head *tasks = &env->src_rq->cfs_tasks; struct task_struct *p; - unsigned long load; + unsigned long util, load; int detached = 0; lockdep_assert_held(&env->src_rq->lock); @@ -7380,19 +7395,53 @@ static int detach_tasks(struct lb_env *env) if (!can_migrate_task(p, env)) goto next; - load = task_h_load(p); + switch (env->balance_type) { + case migrate_load: + load = task_h_load(p); - if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) - goto next; + if (sched_feat(LB_MIN) && + load < 16 && !env->sd->nr_balance_failed) + goto next; - if ((load / 2) > env->imbalance) - goto next; + if ((load / 2) > env->imbalance) + goto next; + + env->imbalance -= load; + break; + + case migrate_util: + util = task_util_est(p); + + if (util > env->imbalance) + goto next; + + env->imbalance -= util; + break; + + case migrate_task: + /* Migrate task */ + env->imbalance--; + break; + +
[PATCH v3 06/10] sched/fair: use load instead of runnable load in load_balance
runnable load has been introduced to take into account the case where blocked load biases the load balance decision which was selecting underutilized group with huge blocked load whereas other groups were overloaded. The load is now only used when groups are overloaded. In this case, it's worth being conservative and taking into account the sleeping tasks that might wakeup on the cpu. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e74836..15ec38c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5385,6 +5385,11 @@ static unsigned long cpu_runnable_load(struct rq *rq) return cfs_rq_runnable_load_avg(&rq->cfs); } +static unsigned long cpu_load(struct rq *rq) +{ + return cfs_rq_load_avg(&rq->cfs); +} + static unsigned long capacity_of(int cpu) { return cpu_rq(cpu)->cpu_capacity; @@ -8070,7 +8075,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) env->flags |= LBF_NOHZ_AGAIN; - sgs->group_load += cpu_runnable_load(rq); + sgs->group_load += cpu_load(rq); sgs->group_util += cpu_util(i); sgs->sum_h_nr_running += rq->cfs.h_nr_running; @@ -8512,7 +8517,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) init_sd_lb_stats(&sds); /* -* Compute the various statistics relavent for load balancing at +* Compute the various statistics relevant for load balancing at * this level. */ update_sd_lb_stats(env, &sds); @@ -8672,10 +8677,10 @@ static struct rq *find_busiest_queue(struct lb_env *env, switch (env->balance_type) { case migrate_load: /* -* When comparing with load imbalance, use cpu_runnable_load() +* When comparing with load imbalance, use cpu_load() * which is not scaled with the CPU capacity. */ - load = cpu_runnable_load(rq); + load = cpu_load(rq); if (nr_running == 1 && load > env->imbalance && !check_cpu_capacity(rq, env->sd)) @@ -8683,7 +8688,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, /* * For the load comparisons with the other CPU's, consider -* the cpu_runnable_load() scaled with the CPU capacity, so +* the cpu_load() scaled with the CPU capacity, so * that the load can be moved away from the CPU that is * potentially running at a lower capacity. * -- 2.7.4
[PATCH v3 0/8] sched/fair: rework the CFS load balance
Several wrong task placement have been raised with the current load balance algorithm but their fixes are not always straight forward and end up with using biased values to force migrations. A cleanup and rework of the load balance will help to handle such UCs and enable to fine grain the behavior of the scheduler for other cases. Patch 1 has already been sent separately and only consolidate asym policy in one place and help the review of the changes in load_balance. Patch 2 renames the sum of h_nr_running in stats. Patch 3 removes meaningless imbalance computation to make review of patch 4 easier. Patch 4 reworks load_balance algorithm and fixes some wrong task placement but try to stay conservative. Patch 5 add the sum of nr_running to monitor non cfs tasks and take that into account when pulling tasks. Patch 6 replaces runnable_load by load now that the signal is only used when overloaded. Patch 7 improves the spread of tasks at the 1st scheduling level. Patch 8 uses utilization instead of load in all steps of misfit task path. Patch 9 replaces runnable_load_avg by load_avg in the wake up path. Patch 10 optimizes find_idlest_group() that was using both runnable_load and load. This has not been squashed with previous patch to ease the review. Some benchmarks results based on 8 iterations of each tests: - small arm64 dual quad cores system tip/sched/corew/ this patchsetimprovement schedpipe 54981 +/-0.36%55459 +/-0.31% (+0.97%) hackbench 1 groups 0.906 +/-2.34%0.906 +/-2.88% (+0.06%) - large arm64 2 nodes / 224 cores system tip/sched/corew/ this patchsetimprovement schedpipe 125323 +/-0.98% 125624 +/-0.71% (+0.24%) hackbench -l (256000/#grp) -g #grp 1 groups 15.360 +/-1.76% 14.206 +/-1.40% (+8.69%) 4 groups 5.822 +/-1.02%5.508 +/-6.45% (+5.38%) 16 groups 3.103 +/-0.80%3.244 +/-0.77% (-4.52%) 32 groups 2.892 +/-1.23%2.850 +/-1.81% (+1.47%) 64 groups 2.825 +/-1.51%2.725 +/-1.51% (+3.54%) 128 groups 3.149 +/-8.46%3.053 +/-13.15% (+3.06%) 256 groups 3.511 +/-8.49%3.019 +/-1.71% (+14.03%) dbench 1 groups 329.677 +/-0.46% 329.771 +/-0.11% (+0.03%) 4 groups 931.499 +/-0.79% 947.118 +/-0.94% (+1.68%) 16 groups 1924.210 +/-0.89% 1947.849 +/-0.76% (+1.23%) 32 groups 2350.646 +/-5.75% 2351.549 +/-6.33% (+0.04%) 64 groups 2201.524 +/-3.35% 2192.749 +/-5.84% (-0.40%) 128 groups 2206.858 +/-2.50% 2376.265 +/-7.44% (+7.68%) 256 groups 1263.520 +/-3.34% 1633.143 +/-13.02% (+29.25%) tip/sched/core sha1: 0413d7f33e60 ('sched/uclamp: Always use 'enum uclamp_id' for clamp_id values') Changes since v2: - fix typo and reorder code - some minor code fixes - optimize the find_idles_group() Not covered in this patchset: - update find_idlest_group() to be more aligned with load_balance(). I didn't want to delay this version because of this update which is not ready yet - Better detection of overloaded and fully busy state, especially for cases when nr_running > nr CPUs. Vincent Guittot (8): sched/fair: clean up asym packing sched/fair: rename sum_nr_running to sum_h_nr_running sched/fair: remove meaningless imbalance calculation sched/fair: rework load_balance sched/fair: use rq->nr_running when balancing load sched/fair: use load instead of runnable load in load_balance sched/fair: evenly spread tasks when not overloaded sched/fair: use utilization to select misfit task sched/fair: use load instead of runnable load in wakeup path sched/fair: optimize find_idlest_group kernel/sched/fair.c | 805 +++- 1 file changed, 417 insertions(+), 388 deletions(-) -- 2.7.4
[PATCH v3 08/10] sched/fair: use utilization to select misfit task
utilization is used to detect a misfit task but the load is then used to select the task on the CPU which can lead to select a small task with high weight instead of the task that triggered the misfit migration. Signed-off-by: Vincent Guittot Acked-by: Valentin Schneider --- kernel/sched/fair.c | 10 ++ 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a7c8ee6..acca869 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7429,14 +7429,8 @@ static int detach_tasks(struct lb_env *env) break; case migrate_misfit: - load = task_h_load(p); - - /* -* utilization of misfit task might decrease a bit -* since it has been recorded. Be conservative in the -* condition. -*/ - if (load < env->imbalance) + /* This is not a misfit task */ + if (task_fits_capacity(p, capacity_of(env->src_cpu))) goto next; env->imbalance = 0; -- 2.7.4
[PATCH v3 01/10] sched/fair: clean up asym packing
Clean up asym packing to follow the default load balance behavior: - classify the group by creating a group_asym_packing field. - calculate the imbalance in calculate_imbalance() instead of bypassing it. We don't need to test twice same conditions anymore to detect asym packing and we consolidate the calculation of imbalance in calculate_imbalance(). There is no functional changes. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 63 ++--- 1 file changed, 16 insertions(+), 47 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1054d2c..3175fea 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7685,6 +7685,7 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; int group_no_capacity; + unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; @@ -8139,9 +8140,17 @@ static bool update_sd_pick_busiest(struct lb_env *env, * ASYM_PACKING needs to move all the work to the highest * prority CPUs in the group, therefore mark all groups * of lower priority than ourself as busy. +* +* This is primarily intended to used at the sibling level. Some +* cores like POWER7 prefer to use lower numbered SMT threads. In the +* case of POWER7, it can move to lower SMT modes only when higher +* threads are idle. When in lower SMT modes, the threads will +* perform better since they share less core resources. Hence when we +* have idle threads, we want them to be the higher ones. */ if (sgs->sum_nr_running && sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) { + sgs->group_asym_packing = 1; if (!sds->busiest) return true; @@ -8283,51 +8292,6 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd } /** - * check_asym_packing - Check to see if the group is packed into the - * sched domain. - * - * This is primarily intended to used at the sibling level. Some - * cores like POWER7 prefer to use lower numbered SMT threads. In the - * case of POWER7, it can move to lower SMT modes only when higher - * threads are idle. When in lower SMT modes, the threads will - * perform better since they share less core resources. Hence when we - * have idle threads, we want them to be the higher ones. - * - * This packing function is run on idle threads. It checks to see if - * the busiest CPU in this domain (core in the P7 case) has a higher - * CPU number than the packing function is being run on. Here we are - * assuming lower CPU number will be equivalent to lower a SMT thread - * number. - * - * Return: 1 when packing is required and a task should be moved to - * this CPU. The amount of the imbalance is returned in env->imbalance. - * - * @env: The load balancing environment. - * @sds: Statistics of the sched_domain which is to be packed - */ -static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) -{ - int busiest_cpu; - - if (!(env->sd->flags & SD_ASYM_PACKING)) - return 0; - - if (env->idle == CPU_NOT_IDLE) - return 0; - - if (!sds->busiest) - return 0; - - busiest_cpu = sds->busiest->asym_prefer_cpu; - if (sched_asym_prefer(busiest_cpu, env->dst_cpu)) - return 0; - - env->imbalance = sds->busiest_stat.group_load; - - return 1; -} - -/** * fix_small_imbalance - Calculate the minor imbalance that exists * amongst the groups of a sched_domain, during * load balancing. @@ -8411,6 +8375,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s local = &sds->local_stat; busiest = &sds->busiest_stat; + if (busiest->group_asym_packing) { + env->imbalance = busiest->group_load; + return; + } + if (busiest->group_type == group_imbalanced) { /* * In the group_imb case we cannot rely on group-wide averages @@ -8515,8 +8484,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) busiest = &sds.busiest_stat; /* ASYM feature bypasses nice load balance check */ - if (check_asym_packing(env, &sds)) - return sds.busiest; + if (busiest->group_asym_packing) + goto force_balance; /* There is no busy sibling group to pull tasks from */ if (!sds.busiest || busiest->sum_nr_running == 0) -- 2.7.4
[PATCH v3 02/10] sched/fair: rename sum_nr_running to sum_h_nr_running
Rename sum_nr_running to sum_h_nr_running because it effectively tracks cfs->h_nr_running so we can use sum_nr_running to track rq->nr_running when needed. There is no functional changes. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3175fea..02ab6b5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7680,7 +7680,7 @@ struct sg_lb_stats { unsigned long load_per_task; unsigned long group_capacity; unsigned long group_util; /* Total utilization of the group */ - unsigned int sum_nr_running; /* Nr tasks running in the group */ + unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ unsigned int idle_cpus; unsigned int group_weight; enum group_type group_type; @@ -7725,7 +7725,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .total_capacity = 0UL, .busiest_stat = { .avg_load = 0UL, - .sum_nr_running = 0, + .sum_h_nr_running = 0, .group_type = group_other, }, }; @@ -7916,7 +7916,7 @@ static inline int sg_imbalanced(struct sched_group *group) static inline bool group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) { - if (sgs->sum_nr_running < sgs->group_weight) + if (sgs->sum_h_nr_running < sgs->group_weight) return true; if ((sgs->group_capacity * 100) > @@ -7937,7 +7937,7 @@ group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) static inline bool group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) { - if (sgs->sum_nr_running <= sgs->group_weight) + if (sgs->sum_h_nr_running <= sgs->group_weight) return false; if ((sgs->group_capacity * 100) < @@ -8029,7 +8029,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_load += cpu_runnable_load(rq); sgs->group_util += cpu_util(i); - sgs->sum_nr_running += rq->cfs.h_nr_running; + sgs->sum_h_nr_running += rq->cfs.h_nr_running; nr_running = rq->nr_running; if (nr_running > 1) @@ -8059,8 +8059,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_capacity = group->sgc->capacity; sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; - if (sgs->sum_nr_running) - sgs->load_per_task = sgs->group_load / sgs->sum_nr_running; + if (sgs->sum_h_nr_running) + sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running; sgs->group_weight = group->group_weight; @@ -8117,7 +8117,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, * capable CPUs may harm throughput. Maximize throughput, * power/energy consequences are not considered. */ - if (sgs->sum_nr_running <= sgs->group_weight && + if (sgs->sum_h_nr_running <= sgs->group_weight && group_smaller_min_cpu_capacity(sds->local, sg)) return false; @@ -8148,7 +8148,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, * perform better since they share less core resources. Hence when we * have idle threads, we want them to be the higher ones. */ - if (sgs->sum_nr_running && + if (sgs->sum_h_nr_running && sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) { sgs->group_asym_packing = 1; if (!sds->busiest) @@ -8166,9 +8166,9 @@ static bool update_sd_pick_busiest(struct lb_env *env, #ifdef CONFIG_NUMA_BALANCING static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs) { - if (sgs->sum_nr_running > sgs->nr_numa_running) + if (sgs->sum_h_nr_running > sgs->nr_numa_running) return regular; - if (sgs->sum_nr_running > sgs->nr_preferred_running) + if (sgs->sum_h_nr_running > sgs->nr_preferred_running) return remote; return all; } @@ -8243,7 +8243,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd */ if (prefer_sibling && sds->local && group_has_capacity(env, local) && - (sgs->sum_nr_running > local->sum_nr_running + 1)) { + (sgs->sum_h_nr_running > local->sum_h_nr_running + 1)) { sgs->group_no_capacity = 1; sgs->group_type = group_classify(sg, sgs); } @@ -8255,7 +8255,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd next_group: /* Now, start updating sd_lb_stats */ - sds->total_running +
[PATCH v3 10/10] sched/fair: optimize find_idlest_group
find_idlest_group() now loads CPU's load_avg in 2 different ways. Consolidate the function to read and use load_avg only once and simplify the algorithm to only look for the group with lowest load_avg. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 52 +++- 1 file changed, 11 insertions(+), 41 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 39a37ae..1fac444 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5560,16 +5560,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, { struct sched_group *idlest = NULL, *group = sd->groups; struct sched_group *most_spare_sg = NULL; - unsigned long min_runnable_load = ULONG_MAX; - unsigned long this_runnable_load = ULONG_MAX; - unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; + unsigned long min_load = ULONG_MAX, this_load = ULONG_MAX; unsigned long most_spare = 0, this_spare = 0; int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; unsigned long imbalance = scale_load_down(NICE_0_LOAD) * (sd->imbalance_pct-100) / 100; do { - unsigned long load, avg_load, runnable_load; + unsigned long load; unsigned long spare_cap, max_spare_cap; int local_group; int i; @@ -5586,15 +5584,11 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, * Tally up the load of all CPUs in the group and find * the group containing the CPU with most spare capacity. */ - avg_load = 0; - runnable_load = 0; + load = 0; max_spare_cap = 0; for_each_cpu(i, sched_group_span(group)) { - load = cpu_load(cpu_rq(i)); - runnable_load += load; - - avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); + load += cpu_load(cpu_rq(i)); spare_cap = capacity_spare_without(i, p); @@ -5603,31 +5597,15 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, } /* Adjust by relative CPU capacity of the group */ - avg_load = (avg_load * SCHED_CAPACITY_SCALE) / - group->sgc->capacity; - runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) / + load = (load * SCHED_CAPACITY_SCALE) / group->sgc->capacity; if (local_group) { - this_runnable_load = runnable_load; - this_avg_load = avg_load; + this_load = load; this_spare = max_spare_cap; } else { - if (min_runnable_load > (runnable_load + imbalance)) { - /* -* The runnable load is significantly smaller -* so we can pick this new CPU: -*/ - min_runnable_load = runnable_load; - min_avg_load = avg_load; - idlest = group; - } else if ((runnable_load < (min_runnable_load + imbalance)) && - (100*min_avg_load > imbalance_scale*avg_load)) { - /* -* The runnable loads are close so take the -* blocked load into account through avg_load: -*/ - min_avg_load = avg_load; + if (load < min_load) { + min_load = load; idlest = group; } @@ -5668,18 +5646,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, * local domain to be very lightly loaded relative to the remote * domains but "imbalance" skews the comparison making remote CPUs * look much more favourable. When considering cross-domain, add -* imbalance to the runnable load on the remote node and consider -* staying local. +* imbalance to the load on the remote node and consider staying +* local. */ - if ((sd->flags & SD_NUMA) && - min_runnable_load + imbalance >= this_runnable_load) - return NULL; - - if (min_runnable_load > (this_runnable_load + imbalance)) - return NULL; - - if ((this_runnable_load < (min_runnable_load + imbalance)) && -(100*this_avg_load < imbalance_scale*min_avg_load)) + if (min_load + imbalance >= this_load) return NULL; return idle
[PATCH v3 05/10] sched/fair: use rq->nr_running when balancing load
cfs load_balance only takes care of CFS tasks whereas CPUs can be used by other scheduling class. Typically, a CFS task preempted by a RT or deadline task will not get a chance to be pulled on another CPU because the load_balance doesn't take into account tasks from other classes. Add sum of nr_running in the statistics and use it to detect such situation. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d33379c..7e74836 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7716,6 +7716,7 @@ struct sg_lb_stats { unsigned long group_load; /* Total load over the CPUs of the group */ unsigned long group_capacity; unsigned long group_util; /* Total utilization of the group */ + unsigned int sum_nr_running; /* Nr of tasks running in the group */ unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ unsigned int idle_cpus; unsigned int group_weight; @@ -7949,7 +7950,7 @@ static inline int sg_imbalanced(struct sched_group *group) static inline bool group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) { - if (sgs->sum_h_nr_running < sgs->group_weight) + if (sgs->sum_nr_running < sgs->group_weight) return true; if ((sgs->group_capacity * 100) > @@ -7970,7 +7971,7 @@ group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) static inline bool group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) { - if (sgs->sum_h_nr_running <= sgs->group_weight) + if (sgs->sum_nr_running <= sgs->group_weight) return false; if ((sgs->group_capacity * 100) < @@ -8074,6 +8075,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->sum_h_nr_running += rq->cfs.h_nr_running; nr_running = rq->nr_running; + sgs->sum_nr_running += nr_running; + if (nr_running > 1) *sg_status |= SG_OVERLOAD; @@ -8423,7 +8426,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * groups. */ env->balance_type = migrate_task; - env->imbalance = (busiest->sum_h_nr_running - local->sum_h_nr_running) >> 1; + env->imbalance = (busiest->sum_nr_running - local->sum_nr_running) >> 1; return; } @@ -8585,7 +8588,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) /* Try to move all excess tasks to child's sibling domain */ if (sds.prefer_sibling && local->group_type == group_has_spare && - busiest->sum_h_nr_running > local->sum_h_nr_running + 1) + busiest->sum_nr_running > local->sum_nr_running + 1) goto force_balance; if (busiest->group_type != group_overloaded && -- 2.7.4
[PATCH v3 09/10] sched/fair: use load instead of runnable load in wakeup path
runnable load has been introduced to take into account the case where blocked load biases the wake up path which may end to select an overloaded CPU with a large number of runnable tasks instead of an underutilized CPU with a huge blocked load. Tha wake up path now starts to looks for idle CPUs before comparing runnable load and it's worth aligning the wake up path with the load_balance. Signed-off-by: Vincent Guittot --- kernel/sched/fair.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index acca869..39a37ae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5485,7 +5485,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, s64 this_eff_load, prev_eff_load; unsigned long task_load; - this_eff_load = cpu_runnable_load(cpu_rq(this_cpu)); + this_eff_load = cpu_load(cpu_rq(this_cpu)); if (sync) { unsigned long current_load = task_h_load(current); @@ -5503,7 +5503,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, this_eff_load *= 100; this_eff_load *= capacity_of(prev_cpu); - prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu)); + prev_eff_load = cpu_load(cpu_rq(prev_cpu)); prev_eff_load -= task_load; if (sched_feat(WA_BIAS)) prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; @@ -5591,7 +5591,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, max_spare_cap = 0; for_each_cpu(i, sched_group_span(group)) { - load = cpu_runnable_load(cpu_rq(i)); + load = cpu_load(cpu_rq(i)); runnable_load += load; avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); @@ -5732,7 +5732,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this continue; } - load = cpu_runnable_load(cpu_rq(i)); + load = cpu_load(cpu_rq(i)); if (load < min_load) { min_load = load; least_loaded_cpu = i; -- 2.7.4
Re: [PATCH v7 1/7] nfc: pn533: i2c: "pn532" as dt compatible string
On Wed, Sep 18, 2019 at 02:34:57PM +0200, Simon Horman wrote: > On Tue, Sep 10, 2019 at 11:31:21AM +0200, Lars Poeschel wrote: > > It is favourable to have one unified compatible string for devices that > > have multiple interfaces. So this adds simply "pn532" as the devicetree > > binding compatible string and makes a note that the old ones are > > deprecated. > > Do you also need to update > Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt > to both document the new compat string and deprecate the old ones? Simon, thank you for this hint. The patch 2/7 adds a seperate binding doc, that contains the info about the deprecated compat strings. But I think this is not the way to go. I will change the patch 2/7 to update the info Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt instead, rename it to pn532.txt and do not add a new binding doc.
Re: [PATCH v2 3/3] dt-bindings: ddr: Add bindings for Samsung LPDDR3 memories
On 9/19/19 9:28 AM, Krzysztof Kozlowski wrote: > On Thu, 19 Sep 2019 at 08:49, Lukasz Luba wrote: >> >> Hi Krzysztof, >> >> On 9/18/19 8:51 PM, Krzysztof Kozlowski wrote: >>> On Mon, 16 Sep 2019 at 12:07, Lukasz Luba >>> wrote: Add compatible for Samsung k3qf2f20db LPDDR3 memory bindings. Introduce minor fixes in the old documentation. Signed-off-by: Lukasz Luba --- Documentation/devicetree/bindings/ddr/lpddr3.txt | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/ddr/lpddr3.txt b/Documentation/devicetree/bindings/ddr/lpddr3.txt index 3b2485b84b3f..49afe794daaa 100644 --- a/Documentation/devicetree/bindings/ddr/lpddr3.txt +++ b/Documentation/devicetree/bindings/ddr/lpddr3.txt @@ -1,7 +1,9 @@ * LPDDR3 SDRAM memories compliant to JEDEC JESD209-3C Required properties: -- compatible : Should be - "jedec,lpddr3" +- compatible : should be one of the following: + Generic default - "jedec,lpddr3". >>> >>> The convention is first compatible, then description. I gave you the >>> example to base on - at25. Why making it different? >> >> I have checked at25 that you pointed me to and also checked at24, which >> has a bit longer "compatible" section. >> >> I found that there are many "jedec,spi-nor" compatible devices, which I >> thought would be a better example for my "jedec,lpddr3". >> For example, two configurations, where you have a single labels or dual >> (with specific device) >> arch/arm/boot/dts/imx6dl-rex-basic.dts: >> compatible = "sst,sst25vf016b", "jedec,spi-nor"; >> arch/arm/boot/dts/imx6q-ba16.dtsi: >> compatible = "jedec,spi-nor"; >> >> The 'compatible' in documentation for the "jedec,spi-nor" is slightly >> different (similar to at24). >> Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt >> It has a long explanation, which is also OK. So I thought that it is >> quite flexible what you put in there. > > It is flexible but I see clear pattern in existing sources: >jedec,spi-nor.txt >compatible : May include a device-specific .. >... >Supported chip names: > at25df321a > ... > >at25.txt: >- compatible : Should be ",", and generic value "atmel,at25". > Example "," values: >"anvo,anv32e61w" >"microchip,25lc040" > > In these cases the doc says that "compatible should be" and then you > have the list of values. Your example says that the compatible should > be "Generic default" or "For Samsung 542x SoC"... :) The difference is > slight but putting the value first is a simple and elegant solution. > In your case one has to go to the end of sentence to find the most > important information - the compatible value. > >> I have also checked Cadance QSPI controller. >> Documentation/devicetree/bindings/mtd/cadence-quadspi.txt >> The controller might be built-in into different vendor SoC's >> and the "compatible" is ready to reflect it in similar fashion but >> with a short explanation in this section. > > I see. I do not find this pattern as much readable as jedec-spi-nor or > at25 therefore I mentioned them as an example to base on ("Exactly the > same as AT24 or AT25 EEPROM bindings."). We can avoid also this entire > discussion with YAML (which also follows approach of at25 - value > first). > >> Therefore, what you see in the patch draw heavily on Cadence's qspi, >> with a bit of inspiration from jedec,spi-nor usage. >> >> Should I change it to at25 "compatible" style and send next patch? > > Yes, please. Or go to YAML and make entire discussion obsolete. OK I will change it to at25 style. Regards, Lukasz
Re: [PATCH 4.19 16/50] gpiolib: acpi: Add gpiolib_acpi_run_edge_events_on_boot option and blacklist
On Wed 2019-09-18 08:18:59, Greg Kroah-Hartman wrote: > From: Hans de Goede > > commit 61f7f7c8f978b1c0d80e43c83b7d110ca0496eb4 upstream. > > Another day; another DSDT bug we need to workaround... > > Since commit ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events > at least once on boot") we call _AEI edge handlers at boot. > > In some rare cases this causes problems. One example of this is the Minix > Neo Z83-4 mini PC, this device has a clear DSDT bug where it has some copy > and pasted code for dealing with Micro USB-B connector host/device role > switching, while the mini PC does not even have a micro-USB connector. > This code, which should not be there, messes with the DDC data pin from > the HDMI connector (switching it to GPIO mode) breaking HDMI support. > > To avoid problems like this, this commit adds a new > gpiolib_acpi.run_edge_events_on_boot kernel commandline option, which > allows disabling the running of _AEI edge event handlers at boot. So... apparently Windows does _not_ run _AEI edge event handlers at boot, otherwise Minix would realize that fault. Would it make sense not to do it by default, either? Best regards, Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html signature.asc Description: Digital signature
Re: [PATCH] [RFC] vmscan.c: add a sysctl entry for controlling memory reclaim IO congestion_wait length
On 9/19/19 11:49, Matthew Wilcox wrote: On Thu, Sep 19, 2019 at 10:33:10AM +0800, Lin Feng wrote: On 9/18/19 20:33, Michal Hocko wrote: I absolutely agree here. From you changelog it is also not clear what is the underlying problem. Both congestion_wait and wait_iff_congested should wake up early if the congestion is handled. Is this not the case? For now I don't know why, codes seem should work as you said, maybe I need to trace more of the internals. But weird thing is that once I set the people-disliked-tunable iowait drop down instantly, this is contradictory to the code design. Yes, this is quite strange. If setting a smaller timeout makes a difference, that indicates we're not waking up soon enough. I see two possibilities; one is that a wakeup is missing somewhere -- ie the conditions under which we call clear_wb_congested() are wrong. Or we need to wake up sooner. Umm. We have clear_wb_congested() called from exactly one spot -- clear_bdi_congested(). That is only called from: drivers/block/pktcdvd.c fs/ceph/addr.c fs/fuse/control.c fs/fuse/dev.c fs/nfs/write.c Jens, is something supposed to be calling clear_bdi_congested() in the block layer? blk_clear_congested() used to exist until October 29th last year. Or is something else supposed to be waking up tasks that are sleeping on congestion? IIUC it looks like after commit a1ce35fa49852db60fc6e268038530be533c5b15, besides those *.c places as you mentioned above, vmscan codes will always wait as long as 100ms and nobody wakes them up. here: 1964 while (unlikely(too_many_isolated(pgdat, file, sc))) { 1965 if (stalled) 1966 return 0; 1967 1968 /* wait a bit for the reclaimer. */ >1969 msleep(100); 1970 stalled = true; 1971 1972 /* We are about to die and free our memory. Return now. */ 1973 if (fatal_signal_pending(current)) 1974 return SWAP_CLUSTER_MAX; 1975 } and here: 2784 /* 2785 * If kswapd scans pages marked marked for immediate 2786 * reclaim and under writeback (nr_immediate), it 2787 * implies that pages are cycling through the LRU 2788 * faster than they are written so also forcibly stall. 2789 */ 2790 if (sc->nr.immediate) >2791 congestion_wait(BLK_RW_ASYNC, HZ/10); 2792 } except here, codes where set_bdi_congested will clear_bdi_congested at proper time, exactly the source files you mentioned above, so it's OK. 2808 if (!sc->hibernation_mode && !current_is_kswapd() && 2809current_may_throttle() && pgdat_memcg_congested(pgdat, root)) 2810 wait_iff_congested(BLK_RW_ASYNC, HZ/10);
Re: [PATCH v2] pwm: atmel: Remove platform_device_id and use only dt bindings
On 18.09.2019 17:57, Kamel Bouhara wrote: > Since commit 26202873bb51 ("avr32: remove support for AVR32 > architecture") there is no more user of platform_device_id and we > should only use dt bindings > > Signed-off-by: Kamel Bouhara Acked-by: Claudiu Beznea > --- > Changelog: > v1->v2 > > - Remove whole function atmel_pwm_get_driver_data and call > of_device_get_match_data from atmel_pwm_probe > > drivers/pwm/Kconfig | 2 +- > drivers/pwm/pwm-atmel.c | 35 +++ > 2 files changed, 4 insertions(+), 33 deletions(-) > > diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig > index a7e57516959e..b51fb1a33aa2 100644 > --- a/drivers/pwm/Kconfig > +++ b/drivers/pwm/Kconfig > @@ -44,7 +44,7 @@ config PWM_AB8500 > > config PWM_ATMEL > tristate "Atmel PWM support" > - depends on ARCH_AT91 > + depends on ARCH_AT91 && OF > help > Generic PWM framework driver for Atmel SoC. > > diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c > index e5e1eaf372fa..f7cf0a86a37c 100644 > --- a/drivers/pwm/pwm-atmel.c > +++ b/drivers/pwm/pwm-atmel.c > @@ -318,19 +318,6 @@ static const struct atmel_pwm_data mchp_sam9x60_pwm_data > = { > }, > }; > > -static const struct platform_device_id atmel_pwm_devtypes[] = { > - { > - .name = "at91sam9rl-pwm", > - .driver_data = (kernel_ulong_t)&atmel_sam9rl_pwm_data, > - }, { > - .name = "sama5d3-pwm", > - .driver_data = (kernel_ulong_t)&atmel_sama5_pwm_data, > - }, { > - /* sentinel */ > - }, > -}; > -MODULE_DEVICE_TABLE(platform, atmel_pwm_devtypes); > - > static const struct of_device_id atmel_pwm_dt_ids[] = { > { > .compatible = "atmel,at91sam9rl-pwm", > @@ -350,19 +337,6 @@ static const struct of_device_id atmel_pwm_dt_ids[] = { > }; > MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids); > > -static inline const struct atmel_pwm_data * > -atmel_pwm_get_driver_data(struct platform_device *pdev) > -{ > - const struct platform_device_id *id; > - > - if (pdev->dev.of_node) > - return of_device_get_match_data(&pdev->dev); > - > - id = platform_get_device_id(pdev); > - > - return (struct atmel_pwm_data *)id->driver_data; > -} > - > static int atmel_pwm_probe(struct platform_device *pdev) > { > const struct atmel_pwm_data *data; > @@ -370,7 +344,7 @@ static int atmel_pwm_probe(struct platform_device *pdev) > struct resource *res; > int ret; > > - data = atmel_pwm_get_driver_data(pdev); > + data = of_device_get_match_data(&pdev->dev); > if (!data) > return -ENODEV; > > @@ -396,10 +370,8 @@ static int atmel_pwm_probe(struct platform_device *pdev) > atmel_pwm->chip.dev = &pdev->dev; > atmel_pwm->chip.ops = &atmel_pwm_ops; > > - if (pdev->dev.of_node) { > - atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; > - atmel_pwm->chip.of_pwm_n_cells = 3; > - } > + atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags; > + atmel_pwm->chip.of_pwm_n_cells = 3; > > atmel_pwm->chip.base = -1; > atmel_pwm->chip.npwm = 4; > @@ -437,7 +409,6 @@ static struct platform_driver atmel_pwm_driver = { > .name = "atmel-pwm", > .of_match_table = of_match_ptr(atmel_pwm_dt_ids), > }, > - .id_table = atmel_pwm_devtypes, > .probe = atmel_pwm_probe, > .remove = atmel_pwm_remove, > }; >
[PATCH] platform/x86: pmc_atom: Add Siemens SIMATIC IPC2x7E to critclk_systems DMI table
The SIMATIC IPC227E and IPC277E uses the PMC clock for on-board components and gets stuck during boot if the clock is disabled. Therefore, add this device to the critical systems list. The Board revision does vary in some instances and hence use PRODUCT_NAME to allow the boards to boot with identical names. Tested on SIMATIC IPC227E and IPC277E. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") CC: Jan Kiszka CC: Cedric Hombourger Signed-off-by: Srikanth Krishnakar --- drivers/platform/x86/pmc_atom.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 9aca5e7ce6d0..1e48c2ec684e 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -419,7 +419,14 @@ static const struct dmi_system_id critclk_systems[] = { .ident = "SIMATIC IPC227E", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), - DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"), + DMI_MATCH(DMI_PRODUCT_NAME, "SIMATIC IPC227E"), + }, + }, + { + .ident = "SIMATIC IPC277E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "SIMATIC IPC277E"), }, }, { /*sentinel*/ } -- 2.17.1
Re: [PATCH?] powerpc: Hard wire PT_SOFTE value to 1 in gpr_get() too
Hi Oleg, Thanks for the patch. Oleg Nesterov writes: > I don't have a ppc machine, this patch wasn't even compile tested, > could you please review? > > The commit a8a4b03ab95f ("powerpc: Hard wire PT_SOFTE value to 1 in > ptrace & signals") changed ptrace_get_reg(PT_SOFTE) to report 0x1, > but PTRACE_GETREGS still copies pt_regs->softe as is. Ugh, that certainly seems broken. I guess we forgot/didn't-know that there were two paths through ptrace to get the one register. > This is not consistent and this breaks > http://sourceware.org/systemtap/wiki/utrace/tests/user-regs-peekpoke That's a 404 for me? Is it this: https://sourceware.org/systemtap/wiki/utrace/tests/ That seems to point me to a CVS repo? Which then didn't build. But now I have that one test built, and you're right it fails with: $ ./user-regs-peekpoke mismatch at offset 0x138: poked 0 but peeked 1 > Reported-by: Jan Kratochvil > Signed-off-by: Oleg Nesterov > --- > arch/powerpc/kernel/ptrace.c | 25 + > 1 file changed, 25 insertions(+) > > diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c > index 8c92feb..9e9342c 100644 > --- a/arch/powerpc/kernel/ptrace.c > +++ b/arch/powerpc/kernel/ptrace.c > @@ -363,11 +363,36 @@ static int gpr_get(struct task_struct *target, const > struct user_regset *regset, > BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != >offsetof(struct pt_regs, msr) + sizeof(long)); > > +#ifdef CONFIG_PPC64 > + if (!ret) > + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, > + &target->thread.regs->orig_gpr3, > + offsetof(struct pt_regs, orig_gpr3), > + offsetof(struct pt_regs, softe)); > + > + if (!ret) { > + unsigned long softe = 0x1; > + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr, > + offsetof(struct pt_regs, softe), > + offsetof(struct pt_regs, softe) + > + sizeof(softe)); > + } > + > + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != > + offsetof(struct pt_regs, softe) + sizeof(long)); > + > + if (!ret) > + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, > + &target->thread.regs->trap, > + offsetof(struct pt_regs, trap), > + sizeof(struct user_pt_regs)); > +#else > if (!ret) > ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, > &target->thread.regs->orig_gpr3, > offsetof(struct pt_regs, orig_gpr3), > sizeof(struct user_pt_regs)); > +#endif > if (!ret) > ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, > sizeof(struct user_pt_regs), -1); It would be nice if we could isolate the special logic in once place, ie. ptrace_get_reg(). We could do it like below. I'm 50/50 though on whether it's worth it, or if we should just go with the big ifdef like in your patch. cheers diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f44..55510f1a7ec1 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -334,6 +334,11 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) return -EIO; } +#ifndef __powerpc64__ +/* Needed on 32-bit to make the SOFTE logic below work without ifdefs */ +#define PT_SOFTE PT_MQ +#endif + static int gpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -367,6 +372,24 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.regs->orig_gpr3, offsetof(struct pt_regs, orig_gpr3), + PT_SOFTE * sizeof(long)); + + /* SOFTE is special on 64-bit, the logic is in ptrace_get_reg() */ + if (!ret) { + unsigned long val = 0; + ptrace_get_reg(target, PT_SOFTE, &val); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &val, + PT_SOFTE * sizeof(long), + offsetof(struct pt_regs, trap)); + } + + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != +(PT_SOFTE * sizeof(long)) + sizeof(long)); + + if (!ret) + ret = user_regset_copyout(&pos,
Re: [PATCH v7 20/21] RISC-V: Enable VIRTIO drivers in RV64 and RV32 defconfig
On Wed, 4 Sep 2019, Anup Patel wrote: > This patch enables more VIRTIO drivers (such as console, rpmsg, 9p, > rng, etc.) which are usable on KVM RISC-V Guest and Xvisor RISC-V > Guest. > > Signed-off-by: Anup Patel > Acked-by: Paolo Bonzini > Reviewed-by: Paolo Bonzini > Reviewed-by: Alexander Graf Thanks, queued for v5.4-rc. - Paul
[PATCH] scsi: qedf: Remove always false 'tmp_prio < 0' statement
Since tmp_prio is declared as u8, the following statement is always false. tmp_prio < 0 So remove 'always false' statement. Signed-off-by: Austin Kim --- drivers/scsi/qedf/qedf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 1659d35..59ca98f 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type) tmp_prio = get->operational.app_prio.fcoe; if (qedf_default_prio > -1) qedf->prio = qedf_default_prio; - else if (tmp_prio < 0 || tmp_prio > 7) { + else if (tmp_prio > 7) { QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "FIP/FCoE prio %d out of range, setting to %d.\n", tmp_prio, QEDF_DEFAULT_PRIO); -- 2.6.2
Re: [PATCH] platform/x86: pmc_atom: Add Siemens SIMATIC IPC2x7E to critclk_systems DMI table
On 19.09.19 09:45, Srikanth Krishnakar wrote: The SIMATIC IPC227E and IPC277E uses the PMC clock for on-board components and gets stuck during boot if the clock is disabled. Therefore, add this device to the critical systems list. The Board revision does vary in some instances and hence use PRODUCT_NAME to allow the boards to boot with identical names. Tested on SIMATIC IPC227E and IPC277E. Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL") CC: Jan Kiszka CC: Cedric Hombourger Signed-off-by: Srikanth Krishnakar --- drivers/platform/x86/pmc_atom.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 9aca5e7ce6d0..1e48c2ec684e 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -419,7 +419,14 @@ static const struct dmi_system_id critclk_systems[] = { .ident = "SIMATIC IPC227E", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), - DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"), + DMI_MATCH(DMI_PRODUCT_NAME, "SIMATIC IPC227E"), NACK, this is breaking platform variant detection. + }, + }, + { + .ident = "SIMATIC IPC277E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "SIMATIC IPC277E"), Let us clarify internally if we need to match on the same substring length as for the 227E. Jan }, }, { /*sentinel*/ } -- Siemens AG, Corporate Technology, CT RDA IOT SES-DE Corporate Competence Center Embedded Linux
RE: [PATCH] stmmac: selftest: avoid large stack usage
From: Arnd Bergmann Date: Sep/18/2019, 20:54:34 (UTC+00:00) > + if (!cfg || !cfg->enable) { > value &= ~XGMAC_RSSE; > writel(value, ioaddr + XGMAC_RSS_CTRL); > return 0; > } > > for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) { > - ret = dwxgmac2_rss_write_reg(ioaddr, true, i, *key++); > + if (cfg) > + ret = dwxgmac2_rss_write_reg(ioaddr, true, i, > cfg->key[i]); > + else > + ret = dwxgmac2_rss_write_reg(ioaddr, true, i, 0); > + > if (ret) > return ret; > } > > for (i = 0; i < ARRAY_SIZE(cfg->table); i++) { > - ret = dwxgmac2_rss_write_reg(ioaddr, false, i, cfg->table[i]); > + if (cfg) > + ret = dwxgmac2_rss_write_reg(ioaddr, false, i, > cfg->table[i]); > + else > + ret = dwxgmac2_rss_write_reg(ioaddr, false, i, 0); > + I don't get these "if (cfg)" checks. You already check earlier if cfg is NULL and return if so. I don't think you need these extra checks. Also, your subject line should be something like: "net: stmmac: selftests: ..." --- Thanks, Jose Miguel Abreu
Re: [PATCH v7 08/13] lib/vsprintf: Remove support for %pF and %pf in favour of %pS and %ps
On Wed, Sep 18, 2019 at 3:34 PM Sakari Ailus wrote: > > %pS and %ps are now the preferred conversion specifiers to print function > names. The functionality is equivalent; remove the old, deprecated %pF > and %pf support. > > Depends-on: commit 2d44d165e939 ("scsi: lpfc: Convert existing %pf users to > %ps") Where is this commit present? Not in the mainline as of today. > Signed-off-by: Sakari Ailus > Reviewed-by: Andy Shevchenko > --- > Documentation/core-api/printk-formats.rst | 10 -- > lib/vsprintf.c| 8 ++-- > scripts/checkpatch.pl | 1 - > 3 files changed, 2 insertions(+), 17 deletions(-) > > diff --git a/Documentation/core-api/printk-formats.rst > b/Documentation/core-api/printk-formats.rst > index c6224d039bcbe..922a29eb70e6c 100644 > --- a/Documentation/core-api/printk-formats.rst > +++ b/Documentation/core-api/printk-formats.rst > @@ -86,8 +86,6 @@ Symbols/Function Pointers > > %pS versatile_init+0x0/0x110 > %ps versatile_init > - %pF versatile_init+0x0/0x110 > - %pf versatile_init > %pSRversatile_init+0x9/0x110 > (with __builtin_extract_return_addr() translation) > %pB prev_fn_of_versatile_init+0x88/0x88 > @@ -97,14 +95,6 @@ The ``S`` and ``s`` specifiers are used for printing a > pointer in symbolic > format. They result in the symbol name with (S) or without (s) > offsets. If KALLSYMS are disabled then the symbol address is printed instead. > > -Note, that the ``F`` and ``f`` specifiers are identical to ``S`` (``s``) > -and thus deprecated. We have ``F`` and ``f`` because on ia64, ppc64 and > -parisc64 function pointers are indirect and, in fact, are function > -descriptors, which require additional dereferencing before we can lookup > -the symbol. As of now, ``S`` and ``s`` perform dereferencing on those > -platforms (when needed), so ``F`` and ``f`` exist for compatibility > -reasons only. > - > The ``B`` specifier results in the symbol name with offsets and should be > used when printing stack backtraces. The specifier takes into > consideration the effect of compiler optimisations which may occur > diff --git a/lib/vsprintf.c b/lib/vsprintf.c > index b0967cf17137d..b00b57f9f911f 100644 > --- a/lib/vsprintf.c > +++ b/lib/vsprintf.c > @@ -909,7 +909,7 @@ char *symbol_string(char *buf, char *end, void *ptr, > #ifdef CONFIG_KALLSYMS > if (*fmt == 'B') > sprint_backtrace(sym, value); > - else if (*fmt != 'f' && *fmt != 's') > + else if (*fmt != 's') > sprint_symbol(sym, value); > else > sprint_symbol_no_offset(sym, value); > @@ -2007,9 +2007,7 @@ static char *kobject_string(char *buf, char *end, void > *ptr, > * > * - 'S' For symbolic direct pointers (or function descriptors) with offset > * - 's' For symbolic direct pointers (or function descriptors) without > offset > - * - 'F' Same as 'S' > - * - 'f' Same as 's' > - * - '[FfSs]R' as above with __builtin_extract_return_addr() translation > + * - '[Ss]R' as above with __builtin_extract_return_addr() translation > * - 'B' For backtraced symbolic direct pointers with offset > * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] > * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] > @@ -2112,8 +2110,6 @@ char *pointer(const char *fmt, char *buf, char *end, > void *ptr, > struct printf_spec spec) > { > switch (*fmt) { > - case 'F': > - case 'f': > case 'S': > case 's': > ptr = dereference_symbol_descriptor(ptr); > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl > index 93a7edfe0f059..a60c241112cd4 100755 > --- a/scripts/checkpatch.pl > +++ b/scripts/checkpatch.pl > @@ -6012,7 +6012,6 @@ sub process { > my $ext_type = "Invalid"; > my $use = ""; > if ($bad_specifier =~ /p[Ff]/) { > - $ext_type = "Deprecated"; > $use = " - use %pS instead"; > $use =~ s/pS/ps/ if > ($bad_specifier =~ /pf/); > } > -- > 2.20.1 >
Re: [PATCH] z3fold: fix memory leak in kmem cache
On Wed, Sep 18, 2019 at 9:35 AM Vlastimil Babka wrote: > > On 9/17/19 5:53 PM, Vitaly Wool wrote: > > Currently there is a leak in init_z3fold_page() -- it allocates > > handles from kmem cache even for headless pages, but then they are > > never used and never freed, so eventually kmem cache may get > > exhausted. This patch provides a fix for that. > > > > Reported-by: Markus Linnala > > Signed-off-by: Vitaly Wool > > Can a Fixes: commit be pinpointed, and CC stable added? Fixes: 7c2b8baa61fe578 "mm/z3fold.c: add structure for buddy handles" Best regards, Vitaly > > --- > > mm/z3fold.c | 15 +-- > > 1 file changed, 9 insertions(+), 6 deletions(-) > > > > diff --git a/mm/z3fold.c b/mm/z3fold.c > > index 6397725b5ec6..7dffef2599c3 100644 > > --- a/mm/z3fold.c > > +++ b/mm/z3fold.c > > @@ -301,14 +301,11 @@ static void z3fold_unregister_migration(struct > > z3fold_pool *pool) > > } > > > > /* Initializes the z3fold header of a newly allocated z3fold page */ > > -static struct z3fold_header *init_z3fold_page(struct page *page, > > +static struct z3fold_header *init_z3fold_page(struct page *page, bool > > headless, > > struct z3fold_pool *pool, gfp_t gfp) > > { > > struct z3fold_header *zhdr = page_address(page); > > - struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp); > > - > > - if (!slots) > > - return NULL; > > + struct z3fold_buddy_slots *slots; > > > > INIT_LIST_HEAD(&page->lru); > > clear_bit(PAGE_HEADLESS, &page->private); > > @@ -316,6 +313,12 @@ static struct z3fold_header *init_z3fold_page(struct > > page *page, > > clear_bit(NEEDS_COMPACTING, &page->private); > > clear_bit(PAGE_STALE, &page->private); > > clear_bit(PAGE_CLAIMED, &page->private); > > + if (headless) > > + return zhdr; > > + > > + slots = alloc_slots(pool, gfp); > > + if (!slots) > > + return NULL; > > > > spin_lock_init(&zhdr->page_lock); > > kref_init(&zhdr->refcount); > > @@ -962,7 +965,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, > > size_t size, gfp_t gfp, > > if (!page) > > return -ENOMEM; > > > > - zhdr = init_z3fold_page(page, pool, gfp); > > + zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); > > if (!zhdr) { > > __free_page(page); > > return -ENOMEM; > > >
Re: threads-max observe limits
On Wed 18-09-19 09:15:41, Michal Hocko wrote: > On Tue 17-09-19 12:26:18, Eric W. Biederman wrote: [...] > > b) Not being able to bump threads_max to the physical limit of > >the machine is very clearly a regression. > > ... exactly this part. The changelog of the respective patch doesn't > really exaplain why it is needed except of "it sounds like a good idea > to be consistent". Any take on this Heinrich? If there really is not strong reasoning about the restricting user input then I will suggest reverting 16db3d3f1170 ("kernel/sysctl.c: threads-max observe limits") -- Michal Hocko SUSE Labs
Re: [PATCH v3 17/26] vfio_pci: Loop using PCI_STD_NUM_BARS
On Wed, Sep 18, 2019 at 05:31:33PM +0300, Denis Efremov wrote: > On 9/18/19 12:17 PM, Andrew Murray wrote: > > On Mon, Sep 16, 2019 at 11:41:49PM +0300, Denis Efremov wrote: > >> Refactor loops to use idiomatic C style and avoid the fencepost error > >> of using "i < PCI_STD_RESOURCE_END" when "i <= PCI_STD_RESOURCE_END" > >> is required, e.g., commit 2f686f1d9bee ("PCI: Correct PCI_STD_RESOURCE_END > >> usage"). > >> > >> To iterate through all possible BARs, loop conditions changed to the > >> *number* of BARs "i < PCI_STD_NUM_BARS", instead of the index of the last > >> valid BAR "i <= PCI_STD_RESOURCE_END". > >> > >> Cc: Cornelia Huck > >> Cc: Alex Williamson > >> Signed-off-by: Denis Efremov > >> --- > >> drivers/vfio/pci/vfio_pci.c | 11 ++ > >> drivers/vfio/pci/vfio_pci_config.c | 32 +++-- > >> drivers/vfio/pci/vfio_pci_private.h | 4 ++-- > >> 3 files changed, 26 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c > >> index 703948c9fbe1..cb7d220d3246 100644 > >> --- a/drivers/vfio/pci/vfio_pci.c > >> +++ b/drivers/vfio/pci/vfio_pci.c > >> @@ -110,13 +110,15 @@ static inline bool vfio_pci_is_vga(struct pci_dev > >> *pdev) > >> static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) > >> { > >>struct resource *res; > >> - int bar; > >> + int i; > >>struct vfio_pci_dummy_resource *dummy_res; > >> > >>INIT_LIST_HEAD(&vdev->dummy_resources_list); > >> > >> - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { > >> - res = vdev->pdev->resource + bar; > >> + for (i = 0; i < PCI_STD_NUM_BARS; i++) { > >> + int bar = i + PCI_STD_RESOURCES; > >> + > >> + res = &vdev->pdev->resource[bar]; > > > > Why can't we just drop PCI_STD_RESOURCES and replace it was 0. I understand > > the abstraction here, but we don't do it elsewhere across the kernel. Is > > this > > necessary? > > There was a discussion about this particular case: > https://lkml.org/lkml/2019/8/12/999 > > It was decided to save the original style for vfio drivers. OK no problem. Thanks, Andrew Murray > > > > > Thanks, > > > > Andrew Murray > > > >> > >>if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP)) > >>goto no_mmap; > >> @@ -399,7 +401,8 @@ static void vfio_pci_disable(struct vfio_pci_device > >> *vdev) > >> > >>vfio_config_free(vdev); > >> > >> - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { > >> + for (i = 0; i < PCI_STD_NUM_BARS; i++) { > >> + bar = i + PCI_STD_RESOURCES; > >>if (!vdev->barmap[bar]) > >>continue; > >>pci_iounmap(pdev, vdev->barmap[bar]); > >> diff --git a/drivers/vfio/pci/vfio_pci_config.c > >> b/drivers/vfio/pci/vfio_pci_config.c > >> index f0891bd8444c..90c0b80f8acf 100644 > >> --- a/drivers/vfio/pci/vfio_pci_config.c > >> +++ b/drivers/vfio/pci/vfio_pci_config.c > >> @@ -450,30 +450,32 @@ static void vfio_bar_fixup(struct vfio_pci_device > >> *vdev) > >> { > >>struct pci_dev *pdev = vdev->pdev; > >>int i; > >> - __le32 *bar; > >> + __le32 *vbar; > >>u64 mask; > >> > >> - bar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; > >> + vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; > >> > >> - for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++, bar++) { > >> - if (!pci_resource_start(pdev, i)) { > >> - *bar = 0; /* Unmapped by host = unimplemented to user */ > >> + for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) { > >> + int bar = i + PCI_STD_RESOURCES; > >> + > >> + if (!pci_resource_start(pdev, bar)) { > >> + *vbar = 0; /* Unmapped by host = unimplemented to user > >> */ > >>continue; > >>} > >> > >> - mask = ~(pci_resource_len(pdev, i) - 1); > >> + mask = ~(pci_resource_len(pdev, bar) - 1); > >> > >> - *bar &= cpu_to_le32((u32)mask); > >> - *bar |= vfio_generate_bar_flags(pdev, i); > >> + *vbar &= cpu_to_le32((u32)mask); > >> + *vbar |= vfio_generate_bar_flags(pdev, bar); > >> > >> - if (*bar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { > >> - bar++; > >> - *bar &= cpu_to_le32((u32)(mask >> 32)); > >> + if (*vbar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { > >> + vbar++; > >> + *vbar &= cpu_to_le32((u32)(mask >> 32)); > >>i++; > >>} > >>} > >> > >> - bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; > >> + vbar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; > >> > >>/* > >> * NB. REGION_INFO will have reported zero size if we weren't able > >> @@ -483,14 +485,14 @@ static void vfio_bar_fixup(struct vfio_pci_device > >> *vdev) > >>if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { > >>mask = ~
Re: [PATCH 1/1] sched/eas: introduce system-wide overutil indicator
On Thu, 19 Sep 2019 at 09:20, YT Chang wrote: > > When the system is overutilization, the load-balance crossing s/overutilization/overutilized/ > clusters will be triggered and scheduler will not use energy > aware scheduling to choose CPUs. > > The overutilization means the loading of ANY CPUs s/ANY/any/ > exceeds threshold (80%). > > However, only 1 heavy task or while-1 program will run on highest > capacity CPUs and it still result to trigger overutilization. So > the system will not use Energy Aware scheduling. > > To avoid it, a system-wide over-utilization indicator to trigger > load-balance cross clusters. The current rd->overutilized is already system wide. I mean that as soon as one CPU is overutilized, the whole system is considered as overutilized whereas you would like a finer grain level of overutilization. I remember a patch that was proposing a per sched_domain overutilization detection. The load_balance at one sched_domain level was enabled only if the child level was not able to handle the overutilization and the energy aware scheduling was still used in the other sched_domain > > The policy is: > The loading of "ALL CPUs in the highest capacity" > exceeds threshold(80%) or > The loading of "Any CPUs not in the highest capacity" > exceed threshold(80%) Do you have UCs or figures that show a benefit with this change ? > > Signed-off-by: YT Chang > --- > kernel/sched/fair.c | 76 > + > 1 file changed, 65 insertions(+), 11 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 036be95..f4c3d70 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -5182,10 +5182,71 @@ static inline bool cpu_overutilized(int cpu) > static inline void update_overutilized_status(struct rq *rq) > { > if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { > - WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); > - trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); > + if (capacity_orig_of(cpu_of(rq)) < rq->rd->max_cpu_capacity) { > + WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); > + trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); > + } > } > } > + > +static > +void update_system_overutilized(struct sched_domain *sd, struct cpumask > *cpus) > +{ > + unsigned long group_util; > + bool intra_overutil = false; > + unsigned long max_capacity; > + struct sched_group *group = sd->groups; > + struct root_domain *rd; > + int this_cpu; > + bool overutilized; > + int i; > + > + this_cpu = smp_processor_id(); > + rd = cpu_rq(this_cpu)->rd; > + overutilized = READ_ONCE(rd->overutilized); > + max_capacity = rd->max_cpu_capacity; > + > + do { > + group_util = 0; > + for_each_cpu_and(i, sched_group_span(group), cpus) { > + group_util += cpu_util(i); > + if (cpu_overutilized(i)) { > + if (capacity_orig_of(i) < max_capacity) { > + intra_overutil = true; > + break; > + } > + } > + } > + > + /* > +* A capacity base hint for over-utilization. > +* Not to trigger system overutiled if heavy tasks > +* in Big.cluster, so > +* add the free room(20%) of Big.cluster is impacted which > means > +* system-wide over-utilization, > +* that considers whole cluster not single cpu > +*/ > + if (group->group_weight > 1 && (group->sgc->capacity * 1024 < > + group_util * > capacity_margin)) { > + intra_overutil = true; > + break; > + } > + > + group = group->next; > + > + } while (group != sd->groups && !intra_overutil); > + > + if (overutilized != intra_overutil) { > + if (intra_overutil == true) { > + WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); > + trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); > + } else { > + WRITE_ONCE(rd->overutilized, 0); > + trace_sched_overutilized_tp(rd, 0); > + } > + } > +} > + > #else > static inline void update_overutilized_status(struct rq *rq) { } > #endif > @@ -8242,15 +8303,6 @@ static inline void update_sd_lb_stats(struct lb_env > *env, struct sd_lb_stats *sd > > /* update overload indicator if we are at root domain */
Re: [PATCH v7 00/13] Device property improvements, add %pfw format specifier
On Wed, Sep 18, 2019 at 3:34 PM Sakari Ailus wrote: > > Hi all, > > This set adds functionality into the device property API (counting a > node's parents as well as obtaining its name) in order to support printing > fwnode names using a new conversion specifier "%pfw". The names that are > produced are equivalent to its OF counterpart "%pOF" on OF systems for the > two supported modifiers ("f" and "P"). > > Printing a node's name is something that's been available on OF for a long > time and if something is converted to device property API (such as the > V4L2 fwnode framework) it always got removed of a nice feature that was > sometimes essential in debugging. With this set, that no longer is the > case. > > Note: the set now depends on 2d44d165e939 ("scsi: lpfc: Convert existing > %pf users to %ps") that is expected from the linux-scsi tree. It is OK, so I will be expecting a refresh of the series when the above commit appears in the mainline. Cheers, Rafael
Re: [PATCH v3 06/26] s390/pci: Use PCI_STD_NUM_BARS
On Wed, Sep 18, 2019 at 05:26:59PM +0300, Denis Efremov wrote: > On 9/18/19 11:58 AM, Andrew Murray wrote: > > On Mon, Sep 16, 2019 at 11:41:38PM +0300, Denis Efremov wrote: > >> Remove local definition PCI_BAR_COUNT for the number of PCI BARs and use > >> global one PCI_STD_NUM_BARS instead. > >> > >> Acked-by: Sebastian Ott > >> Cc: Gerald Schaefer > >> Signed-off-by: Denis Efremov > >> --- > >> arch/s390/include/asm/pci.h | 5 + > >> arch/s390/include/asm/pci_clp.h | 6 +++--- > >> arch/s390/pci/pci.c | 16 > >> arch/s390/pci/pci_clp.c | 6 +++--- > >> 4 files changed, 15 insertions(+), 18 deletions(-) > >> > >> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h > >> index a2399eff84ca..3a06c264ea53 100644 > >> --- a/arch/s390/include/asm/pci.h > >> +++ b/arch/s390/include/asm/pci.h > >> @@ -2,9 +2,6 @@ > >> #ifndef __ASM_S390_PCI_H > >> #define __ASM_S390_PCI_H > >> > >> -/* must be set before including pci_clp.h */ > >> -#define PCI_BAR_COUNT 6 > >> - > >> #include > >> #include > >> #include > >> @@ -138,7 +135,7 @@ struct zpci_dev { > >> > >>char res_name[16]; > >>bool mio_capable; > >> - struct zpci_bar_struct bars[PCI_BAR_COUNT]; > >> + struct zpci_bar_struct bars[PCI_STD_NUM_BARS]; > >> > >>u64 start_dma; /* Start of available DMA addresses */ > >>u64 end_dma;/* End of available DMA addresses */ > >> diff --git a/arch/s390/include/asm/pci_clp.h > >> b/arch/s390/include/asm/pci_clp.h > >> index 50359172cc48..bd2cb4ea7d93 100644 > >> --- a/arch/s390/include/asm/pci_clp.h > >> +++ b/arch/s390/include/asm/pci_clp.h > >> @@ -77,7 +77,7 @@ struct mio_info { > >>struct { > >>u64 wb; > >>u64 wt; > >> - } addr[PCI_BAR_COUNT]; > >> + } addr[PCI_STD_NUM_BARS]; > >>u32 reserved[6]; > >> } __packed; > >> > >> @@ -98,9 +98,9 @@ struct clp_rsp_query_pci { > >>u16 util_str_avail : 1; /* utility string available? */ > >>u16 pfgid : 8; /* pci function group id */ > >>u32 fid;/* pci function id */ > >> - u8 bar_size[PCI_BAR_COUNT]; > >> + u8 bar_size[PCI_STD_NUM_BARS]; > >>u16 pchid; > >> - __le32 bar[PCI_BAR_COUNT]; > >> + __le32 bar[PCI_STD_NUM_BARS]; > >>u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ > >>u32 : 16; > >>u8 fmb_len; > >> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c > >> index b0e3b9a0e488..aca372c8e34f 100644 > >> --- a/arch/s390/pci/pci.c > >> +++ b/arch/s390/pci/pci.c > >> @@ -43,7 +43,7 @@ static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); > >> static DEFINE_SPINLOCK(zpci_domain_lock); > >> > >> #define ZPCI_IOMAP_ENTRIES > >> \ > >> - min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \ > >> + min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \ > >>ZPCI_IOMAP_MAX_ENTRIES) > >> > >> static DEFINE_SPINLOCK(zpci_iomap_lock); > >> @@ -294,7 +294,7 @@ static void __iomem *pci_iomap_range_mio(struct > >> pci_dev *pdev, int bar, > >> void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar, > >> unsigned long offset, unsigned long max) > >> { > >> - if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT) > >> + if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar)) > >>return NULL; > >> > >>if (static_branch_likely(&have_mio)) > >> @@ -324,7 +324,7 @@ static void __iomem *pci_iomap_wc_range_mio(struct > >> pci_dev *pdev, int bar, > >> void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar, > >> unsigned long offset, unsigned long max) > >> { > >> - if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT) > >> + if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar)) > >>return NULL; > > > > This looks like a latent bug fix here. If 'bar' is out of range we return > > NULL instead accessing an invalid item of an array. Should this not be > > a separate patch and tagged as stable? > > > > This fix was suggested by Bjorn in v1 review: > https://lkml.org/lkml/2019/8/12/997 > Ah yes, apologies - I'll re-read the previous threads next time. Thanks, Andrew Murray > > > Thanks, > > > > Andrew Murray > > > >> > >>if (static_branch_likely(&have_mio)) > >> @@ -416,7 +416,7 @@ static void zpci_map_resources(struct pci_dev *pdev) > >>resource_size_t len; > >>int i; > >> > >> - for (i = 0; i < PCI_BAR_COUNT; i++) { > >> + for (i = 0; i < PCI_STD_NUM_BARS; i++) { > >>len = pci_resource_len(pdev, i); > >>if (!len) > >>continue; > >> @@ -451,7 +451,7 @@ static void zpci_unmap_resources(struct pci_dev *pdev) > >>if (zpci_use_mio(zdev)) > >>return; > >> > >> - for (i = 0; i < PCI_BAR_COUNT; i++) { > >> + for (i = 0
Re: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM SoC
Hi Martin, On 9/12/2019 2:38 PM, Dilip Kota wrote: Re-sending the mail, because of delivery failure. sorry for the spam. Hi Martin, On 9/6/2019 4:53 AM, Martin Blumenstingl wrote: Hi, On Thu, Sep 5, 2019 at 4:38 AM Chuan Hua, Lei wrote: [...] I'm not surprised that we got some of the IP block layout for the VRX200 RCU "wrong" - all "documentation" we have is the old Lantiq UGW (BSP). with proper documentation (as in a "public datasheet for the SoC") it would be easy to spot these mistakes (at least I assume that the quality of the Infineon / Lantiq datasheets is excellent). back to reset-intel-syscon: assigning only one job to the RCU hardware is a good idea (in my opinion). that brings up a question: why do we need the "syscon" compatible for the RCU node? this is typically used when registers are accessed by another IP block and the other driver has to access these registers as well. does this mean that there's more hidden in the RCU registers? As I mentioned, some other misc registers are put into RCU even they don't belong to reset functions. OK, just be aware that there are also rules for syscon compatible drivers, see for example: [0] if Rob (dt-bindings maintainer) is happy with the documentation in patch 1 then I'm fine with it as well. for my own education I would appreciate if you could describe these "other misc registers" with a few sentences (I assume that this can also help Rob) For LGM, RCU is clean. There would be no MISC register after software's feedback. These misc registers will be moved to chiptop/misc groups(implemented by syscon). For legacy SoC, we do have a lot MISC registers for different SoCs. OK, I think I understand now: chiptop != RCU so RCU really only has one purpose: handling resets while chiptop manages all the random bits does this means we don't need RCU to match "syscon"? If we don't support legacy SoC with the same driver, we don't need syscon, just regmap. Regmap is a must for us since we will use regmap proxy to implement secure rest via secure processor. I think we should drop the syscon compatible for LGM then even for the legacy SoCs the reset controller should not have a syscon compatible: instead it should have a syscon parent (as the current "lantiq,xrx200-reset" binding requires and as suggested by Rob for another IP block: [0]) I am not sure if syscon parent really matches hardware implementation. In all our Networking SoCs, chiptop is kind of misc register collection. Some registers can't belong to any particular group, or they need to work together with other modules(therefore, these misc registers would be accessed by two or more modules). However, chiptop is not a hardware module. indeed, chiptop should not have any child nodes (based on your explanation). I was referring to VRX200 where the RCU syscon has various children (one child node for each hardware module that's part of RCU: reset controller, 2x USB PHY, ...) back to LGM: you said that the LGM RCU registers only contain the reset controller. thus I see no need for the syscon compatible keeping regmap is great in my opinion because it's a nice API and gets rid of some boilerplate even better if it makes things easier for accessing the secure processor [...] 4. Code not optimized and intel internal review not assessed. insights from you (like the issue with the reset callback) are very valuable - this shows that we should focus on having one driver. Based on the above findings, I would suggest reset-lantiq.c to move to reset-intel-syscon.c my concern with having two separate drivers is that it will be hard to migrate from reset-lantiq to the "optimized" reset-intel-syscon driver. I don't have access to the datasheets for the any Lantiq/Intel SoC (VRX200 and even older). so debugging issues after switching from one driver to another is tedious because I cannot tell which part of the driver is causing a problem (it's either "all code from driver A" vs "all code from driver B", meaning it's hard to narrow it down). with separate commits/patches that are improving the reset-lantiq driver I can do git bisect to find the cause of a problem on the older SoCs (VRX200 for example) Our internal version supports XRX350/XRX500/PRX300(MIPS based) and latest Lighting Mountain(X86 based). Migration to reset-intel-syscon.c should be straight forward. what about the _reset callback on the XRX350/XRX500/PRX300 SoCs - do they only use level resets (_assert and _deassert) or are some reset lines using reset pulses (_reset)? when we wanted to switch from reset-lantiq.c to reset-intel-syscon.c we still had to add support for the _reset callback as this is missing in reset-intel-syscon.c currently Yes. We have reset pulse(assert, then check the reset status). only now I realized that the reset-intel-syscon driver does not seem to use the status registers (instead it's looking at the reset registers when checking the status). what happened to the
Re: printk meeting at LPC
On Wed, Sep 18, 2019 at 9:42 AM John Ogness wrote: > > On 2019-09-18, Sergey Senozhatsky wrote: > >> For instance, tty/sysrq must be able to switch printk emergency > >> on/off. > > > > How did we come up to that _sync() printk() emergency mode (when we > > make sure that there is no active printing kthread)? We had a number > > of cases (complaints) of lost kernel messages. There are scenarios in > > which we cannot offload to async preemptible printing kthread, because > > current control path is, for instance, going to reboot the kernel. In > > sync printk() mode we have some sort (!) of guarantees that when we do > > > > pr_emerg("Restarting system\n"); > > kmsg_dump(KMSG_DUMP_RESTART); > > machine_restart(cmd); > > > > pr_emerg("Restarting system\n") is going to flush logbuf before the > > system will machine_restart(). > > Yes, this was why I asked Daniel how the bsod stuff will be > implemented. We don't want a bsod just because we are > restarting. Perhaps write_atomic() should also have a "reason" argument > like kmsg_dump does. I will keep in touch with Daniel to make sure we > are sync on this. I thought that's why there'll be the oops_in_progress parameter for write_atomic? For the fbcon/graphics side I think we maybe need three levels: - normal console writes with the kthread - write_atomic, but non-destructive: Just directly write into the framebuffer. Might need a serious locking rework in fbcon to make this possible, plus won't work on drivers where the framebuffer is either not statically pinned, or where you need to take additional work to flush the updates out to the display. - bsod, where we attempt an unfriendly takeover of the display with trylocks and just overwrite what's there to display the oops. that one is probably best suited for kmsg_dump. Cheers, Daniel > > It's going to be a bit harder when we have per-console kthread. > > Each console has its own iterator. This iterators will need to advance, > regardless if the message was printed via write() or write_atomic(). > > John Ogness -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Re: [PATCH v2] net: dsa: sja1105: prevent leaking memory
From: Vladimir Oltean Date: Wed, 18 Sep 2019 23:00:20 +0300 > Hi Navid, > > Thanks for the patch. > > On 9/18/19 9:04 PM, Navid Emamdoost wrote: >> In sja1105_static_config_upload, in two cases memory is leaked: when >> static_config_buf_prepare_for_upload fails and when sja1105_inhibit_tx >> fails. In both cases config_buf should be released. >> Fixes: 8aa9ebccae876 (avoid leaking config_buf) >> Fixes: 1a4c69406cc1c (avoid leaking config_buf) >> > > You're not supposed to add a short description of the patch here, but > rather the commit message of the patch you're fixing. > Add this to your ~/.gitconfig: > > [pretty] > fixes = Fixes: %h (\"%s\") > > And then run: > git show --pretty=fixes 8aa9ebccae87621d997707e4f25e53fddd7e30e4 > > Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 > 5-port L2 switch") > > git show --pretty=fixes 1a4c69406cc1c3c42bb7391c8eb544e93fe9b320 > > Fixes: 1a4c69406cc1 ("net: dsa: sja1105: Prevent PHY jabbering during > switch reset") However the Fixes: line should not be broken up like this with newlines.
Re: [PATCH 1/1] sched/eas: introduce system-wide overutil indicator
Hi YT, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [cannot apply to v5.3 next-20190918] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/YT-Chang/sched-eas-introduce-system-wide-overutil-indicator/20190919-152213 config: i386-defconfig (attached as .config) compiler: gcc-7 (Debian 7.4.0-13) 7.4.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): kernel/sched/fair.c: In function 'update_system_overutilized': >> kernel/sched/fair.c:5234:20: error: 'capacity_margin' undeclared (first use >> in this function); did you mean 'capacity_of'? group_util * capacity_margin)) { ^~~ capacity_of kernel/sched/fair.c:5234:20: note: each undeclared identifier is reported only once for each function it appears in vim +5234 kernel/sched/fair.c 5195 5196 static 5197 void update_system_overutilized(struct sched_domain *sd, struct cpumask *cpus) 5198 { 5199 unsigned long group_util; 5200 bool intra_overutil = false; 5201 unsigned long max_capacity; 5202 struct sched_group *group = sd->groups; 5203 struct root_domain *rd; 5204 int this_cpu; 5205 bool overutilized; 5206 int i; 5207 5208 this_cpu = smp_processor_id(); 5209 rd = cpu_rq(this_cpu)->rd; 5210 overutilized = READ_ONCE(rd->overutilized); 5211 max_capacity = rd->max_cpu_capacity; 5212 5213 do { 5214 group_util = 0; 5215 for_each_cpu_and(i, sched_group_span(group), cpus) { 5216 group_util += cpu_util(i); 5217 if (cpu_overutilized(i)) { 5218 if (capacity_orig_of(i) < max_capacity) { 5219 intra_overutil = true; 5220 break; 5221 } 5222 } 5223 } 5224 5225 /* 5226 * A capacity base hint for over-utilization. 5227 * Not to trigger system overutiled if heavy tasks 5228 * in Big.cluster, so 5229 * add the free room(20%) of Big.cluster is impacted which means 5230 * system-wide over-utilization, 5231 * that considers whole cluster not single cpu 5232 */ 5233 if (group->group_weight > 1 && (group->sgc->capacity * 1024 < > 5234 group_util * > capacity_margin)) { 5235 intra_overutil = true; 5236 break; 5237 } 5238 5239 group = group->next; 5240 5241 } while (group != sd->groups && !intra_overutil); 5242 5243 if (overutilized != intra_overutil) { 5244 if (intra_overutil == true) { 5245 WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); 5246 trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); 5247 } else { 5248 WRITE_ONCE(rd->overutilized, 0); 5249 trace_sched_overutilized_tp(rd, 0); 5250 } 5251 } 5252 } 5253 --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH v3] bonding: force enable lacp port after link state recovery for 802.3ad
zhangsha (A) wrote: >> -Original Message- >> From: zhangsha (A) >> Sent: 2019年9月18日 21:06 >> To: jay.vosbu...@canonical.com; vfal...@gmail.com; a...@greyhouse.net; >> da...@davemloft.net; net...@vger.kernel.org; linux-kernel@vger.kernel.org; >> yuehaibing ; hunongda ; >> Chenzhendong (alex) ; zhangsha (A) >> >> Subject: [PATCH v3] bonding: force enable lacp port after link state >> recovery for >> 802.3ad >> >> From: Sha Zhang >> >> After the commit 334031219a84 ("bonding/802.3ad: fix slave link >> initialization >> transition states") merged, the slave's link status will be changed to >> BOND_LINK_FAIL from BOND_LINK_DOWN in the following scenario: >> - Driver reports loss of carrier and >> bonding driver receives NETDEV_DOWN notifier >> - slave's duplex and speed is zerod and >> its port->is_enabled is cleard to 'false'; >> - Driver reports link recovery and >> bonding driver receives NETDEV_UP notifier; >> - If speed/duplex getting failed here, the link status >> will be changed to BOND_LINK_FAIL; >> - The MII monotor later recover the slave's speed/duplex >> and set link status to BOND_LINK_UP, but remains >> the 'port->is_enabled' to 'false'. >> >> In this scenario, the lacp port will not be enabled even its speed and >> duplex are >> valid. The bond will not send LACPDU's, and its state is 'AD_STATE_DEFAULTED' >> forever. The simplest fix I think is to call bond_3ad_handle_link_change() in >> bond_miimon_commit, this function can enable lacp after port slave speed >> check. >> As enabled, the lacp port can run its state machine normally after link >> recovery. >> >> Signed-off-by: Sha Zhang >> --- >> drivers/net/bonding/bond_main.c | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/net/bonding/bond_main.c >> b/drivers/net/bonding/bond_main.c index 931d9d9..76324a5 100644 >> --- a/drivers/net/bonding/bond_main.c >> +++ b/drivers/net/bonding/bond_main.c >> @@ -2206,7 +2206,8 @@ static void bond_miimon_commit(struct bonding >> *bond) >> */ >> if (BOND_MODE(bond) == BOND_MODE_8023AD && >> slave->link == BOND_LINK_UP) >> - >> bond_3ad_adapter_speed_duplex_changed(slave); >> +bond_3ad_handle_link_change(slave, >> +BOND_LINK_UP); >> continue; >> >> case BOND_LINK_UP: > >Hi, David, >I have replied your email for a while, I guess you may miss my email, so I >resend it. >The following link address is the last email, please review the new one again, >thank you. >https://patchwork.ozlabs.org/patch/1151915/ > >Last time, you doubted this is a driver specific problem, >I prefer to believe it's not because I find the commit 4d2c0cda, >its log says " Some NIC drivers don't have correct speed/duplex >settings at the time they send NETDEV_UP notification ...". > >Anyway, I think the lacp status should be fixed correctly, >since link-monitoring (miimon) set SPEED/DUPLEX right here. I suspect this is going to be related to the concurrent discussion with Aleksei, and would like to see the instrumentation results from his tests before adding another change to attempt to resolve this. Also, what device are you using for your testing, and are you able to run the instrumentation patch that I provided to Aleksei and provide its results? -J --- -Jay Vosburgh, jay.vosbu...@canonical.com
Re: [PATCH v3] net: dsa: sja1105: prevent leaking memory
From: Navid Emamdoost Date: Wed, 18 Sep 2019 15:34:06 -0500 > Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port > L2 switch") > > Fixes: 1a4c69406cc1 ("net: dsa: sja1105: Prevent PHY jabbering during > switch reset") Please: 1) Do not break Fixes: tags into multiples lines, that way the string is easily greppable. 2) Do not separate the Fixes: from other tags with newlines. It is just another tag like Signed-off-by: and Acked-by: Thanks.
[v5 1/2] dt/bindings: clk: Add YAML schemas for LS1028A Display Clock bindings
LS1028A has a clock domain PXLCLK0 used for provide pixel clocks to Display output interface. Add a YAML schema for this. Signed-off-by: Wen He Reviewed-by: Rob Herring --- .../devicetree/bindings/clock/fsl,plldig.yaml | 43 +++ 1 file changed, 43 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/fsl,plldig.yaml diff --git a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml new file mode 100644 index ..32274e94aafc --- /dev/null +++ b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bindings/clock/fsl,plldig.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock Binding + +maintainers: + - Wen He + +description: | + NXP LS1028A has a clock domain PXLCLK0 used for the Display output + interface in the display core, as implemented in TSMC CLN28HPM PLL. + which generate and offers pixel clocks to Display. + +properties: + compatible: +const: fsl,ls1028a-plldig + + reg: +maxItems: 1 + + '#clock-cells': +const: 0 + +required: + - compatible + - reg + - clocks + - '#clock-cells' + +examples: + # Display PIXEL Clock node: + - | +dpclk: clock-display@f1f { +compatible = "fsl,ls1028a-plldig"; +reg = <0x0 0xf1f 0x0 0x>; +#clock-cells = <0>; +clocks = <&osc_27m>; +}; + +... -- 2.17.1
[v5 2/2] clk: ls1028a: Add clock driver for Display output interface
Add clock driver for QorIQ LS1028A Display output interfaces(LCD, DPHY), as implemented in TSMC CLN28HPM PLL, this PLL supports the programmable integer division and range of the display output pixel clock's 27-594MHz. Signed-off-by: Wen He --- change in v5: - update some code according the maintainter review feedback. drivers/clk/Kconfig | 10 ++ drivers/clk/Makefile | 1 + drivers/clk/clk-plldig.c | 289 +++ 3 files changed, 300 insertions(+) create mode 100644 drivers/clk/clk-plldig.c diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 801fa1cd0321..0895000c392a 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -223,6 +223,16 @@ config CLK_QORIQ This adds the clock driver support for Freescale QorIQ platforms using common clock framework. +config CLK_LS1028A_PLLDIG +tristate "Clock driver for LS1028A Display output" +depends on ARCH_LAYERSCAPE || COMPILE_TEST +default ARCH_LAYERSCAPE +help + This driver support the Display output interfaces(LCD, DPHY) pixel clocks + of the QorIQ Layerscape LS1028A, as implemented TSMC CLN28HPM PLL. Not all + features of the PLL are currently supported by the driver. By default, + configured bypass mode with this PLL. + config COMMON_CLK_XGENE bool "Clock driver for APM XGene SoC" default ARCH_XGENE diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 0138fb14e6f8..d23b7464aba8 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_COMMON_CLK_OXNAS)+= clk-oxnas.o obj-$(CONFIG_COMMON_CLK_PALMAS)+= clk-palmas.o obj-$(CONFIG_COMMON_CLK_PWM) += clk-pwm.o obj-$(CONFIG_CLK_QORIQ)+= clk-qoriq.o +obj-$(CONFIG_CLK_LS1028A_PLLDIG) += clk-plldig.o obj-$(CONFIG_COMMON_CLK_RK808) += clk-rk808.o obj-$(CONFIG_COMMON_CLK_HI655X)+= clk-hi655x.o obj-$(CONFIG_COMMON_CLK_S2MPS11) += clk-s2mps11.o diff --git a/drivers/clk/clk-plldig.c b/drivers/clk/clk-plldig.c new file mode 100644 index ..e7fceeae0533 --- /dev/null +++ b/drivers/clk/clk-plldig.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 NXP + * + * Clock driver for LS1028A Display output interfaces(LCD, DPHY). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* PLLDIG register offsets and bit masks */ +#define PLLDIG_REG_PLLSR0x24 +#define PLLDIG_REG_PLLDV0x28 +#define PLLDIG_REG_PLLFM0x2c +#define PLLDIG_REG_PLLFD0x30 +#define PLLDIG_REG_PLLCAL1 0x38 +#define PLLDIG_REG_PLLCAL2 0x3c +#define PLLDIG_LOCK_MASKBIT(2) +#define PLLDIG_REG_FIELD_SSCGBYPBIT(30) +#define PLLDIG_REG_FIELD_FDEN BIT(30) +#define PLLDIG_REG_FIELD_DTHDIS GENMASK(17, 16) +#define PLLDIG_REG_FIELD_MULT GENMASK(7, 0) +#define PLLDIG_REG_FIELD_RFDPHI1GENMASK(30, 25) + +/* Minimum output clock frequency, in Hz */ +#define PHI1_MIN_FREQ 2700 + +/* Maximum output clock frequency, in Hz */ +#define PHI1_MAX_FREQ 6 + +/* Maximum of the divider */ +#define MAX_RFDPHI1 63 + +/* Best value of multiplication factor divider */ +#define PLLDIG_DEFAULE_MULT 44 + +/* + * Clock configuration relationship between the PHI1 frequency(fpll_phi) and + * the output frequency of the PLL is determined by the PLLDV, according to + * the following equation: + * fpll_phi = (pll_ref * mfd) / div_rfdphi1 + */ +struct plldig_phi1_param { + unsigned long rate; + unsigned int rfdphi1; + unsigned int mfd; +}; + +static const struct clk_parent_data parent_data[] = { + {.index = 0}, +}; + +struct clk_plldig { + struct clk_hw hw; + void __iomem *regs; +}; + +#define to_clk_plldig(_hw) container_of(_hw, struct clk_plldig, hw) + +static int plldig_enable(struct clk_hw *hw) +{ + struct clk_plldig *data = to_clk_plldig(hw); + u32 val; + + val = readl(data->regs + PLLDIG_REG_PLLFM); + /* +* Use Bypass mode with PLL off by default, the frequency overshoot +* detector output was disable. SSCG Bypass mode should be enable. +*/ + val |= PLLDIG_REG_FIELD_SSCGBYP; + writel(val, data->regs + PLLDIG_REG_PLLFM); + + val = readl(data->regs + PLLDIG_REG_PLLFD); + /* Disable dither and Sigma delta modulation in bypass mode */ + val |= FIELD_PREP(PLLDIG_REG_FIELD_FDEN, 0x1) | + FIELD_PREP(PLLDIG_REG_FIELD_DTHDIS, 0x3); + + writel(val, data->regs + PLLDIG_REG_PLLFD); + + return 0; +} + +static void plldig_disable(struct clk_hw *hw) +{ + struct clk_plldig *data = to_clk_plldig(hw); + u32 val; + + val = readl(data->regs + PLLDIG_REG_PLLFM); + + val &= ~PLLDIG_RE
Re: [PATCH] ACPICA: make acpi_load_table() return table index
On Thursday, September 12, 2019 10:07:42 AM CEST Nikolaus Voss wrote: > For unloading an ACPI table, it is necessary to provide the > index of the table. The method intended for dynamically > loading or hotplug addition of tables, acpi_load_table(), > should provide this information via an optional pointer > to the loaded table index. > > This patch fixes the table unload function of acpi_configfs. > > Reported-by: Andy Shevchenko > Fixes: d06c47e3dd07f ("ACPI: configfs: Resolve objects on host-directed table > loads") > Signed-off-by: Nikolaus Voss Overall, I think that something similar to this patch will be needed, but please don't change the acpi_load_table() signature. Instead, define it as a wrapper around a new function called, say, acpi_load_table_with_index() that will take two arguments, like acpi_load_table() in your patch. Then, you'd only need to call acpi_load_table_with_index() directly from acpi_table_aml_write(). In that case, IMO, it will be easier to handle the divergence between the upstream ACPICA and the kernel in the future in case the upstream doesn't decide to incorporate your change. > --- > drivers/acpi/acpi_configfs.c | 2 +- > drivers/acpi/acpica/dbfileio.c | 2 +- > drivers/acpi/acpica/tbxfload.c | 8 ++-- > drivers/firmware/efi/efi.c | 2 +- > include/acpi/acpixf.h | 3 ++- > 5 files changed, 11 insertions(+), 6 deletions(-) > > diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c > index 57d9d574d4dde..77f81242a28e6 100644 > --- a/drivers/acpi/acpi_configfs.c > +++ b/drivers/acpi/acpi_configfs.c > @@ -53,7 +53,7 @@ static ssize_t acpi_table_aml_write(struct config_item *cfg, > if (!table->header) > return -ENOMEM; > > - ret = acpi_load_table(table->header); > + ret = acpi_load_table(table->header, &table->index); > if (ret) { > kfree(table->header); > table->header = NULL; > diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c > index c6e25734dc5cd..e1b6e54a96ac1 100644 > --- a/drivers/acpi/acpica/dbfileio.c > +++ b/drivers/acpi/acpica/dbfileio.c > @@ -93,7 +93,7 @@ acpi_status acpi_db_load_tables(struct acpi_new_table_desc > *list_head) > while (table_list_head) { > table = table_list_head->table; > > - status = acpi_load_table(table); > + status = acpi_load_table(table, NULL); > if (ACPI_FAILURE(status)) { > if (status == AE_ALREADY_EXISTS) { > acpi_os_printf > diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c > index 86f1693f6d29a..d08cd8ffcbdb6 100644 > --- a/drivers/acpi/acpica/tbxfload.c > +++ b/drivers/acpi/acpica/tbxfload.c > @@ -268,7 +268,8 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table) > * > * PARAMETERS: table - Pointer to a buffer containing the ACPI > *table to be loaded. > - * > + * table_idx - Pointer to a u32 for storing the table > + *index, might be NULL > * RETURN: Status > * > * DESCRIPTION: Dynamically load an ACPI table from the caller's buffer. Must > @@ -278,7 +279,7 @@ ACPI_EXPORT_SYMBOL_INIT(acpi_install_table) > * to ensure that the table is not deleted or unmapped. > * > > **/ > -acpi_status acpi_load_table(struct acpi_table_header *table) > +acpi_status acpi_load_table(struct acpi_table_header *table, u32 *table_idx) > { > acpi_status status; > u32 table_index; > @@ -297,6 +298,9 @@ acpi_status acpi_load_table(struct acpi_table_header > *table) > status = acpi_tb_install_and_load_table(ACPI_PTR_TO_PHYSADDR(table), > > ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL, > FALSE, &table_index); > + if (table_idx) > + *table_idx = table_index; > + > if (ACPI_SUCCESS(status)) { > > /* Complete the initialization/resolution of new objects */ > diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c > index ad3b1f4866b35..9773e4212baef 100644 > --- a/drivers/firmware/efi/efi.c > +++ b/drivers/firmware/efi/efi.c > @@ -308,7 +308,7 @@ static __init int efivar_ssdt_load(void) > goto free_data; > } > > - ret = acpi_load_table(data); > + ret = acpi_load_table(data, NULL); > if (ret) { > pr_err("failed to load table: %d\n", ret); > goto free_data; > diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h > index 3845c8fcc94e5..c90bbdc4146a6 100644 > --- a/include/acpi/acpixf.h > +++ b/include/acpi/acpixf.h > @@ -452,7 +452,8 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION >
Re: [PATCH v2] net: dsa: sja1105: prevent leaking memory
On Thu, 19 Sep 2019 at 11:11, David Miller wrote: > > From: Vladimir Oltean > Date: Wed, 18 Sep 2019 23:00:20 +0300 > > > Hi Navid, > > > > Thanks for the patch. > > > > On 9/18/19 9:04 PM, Navid Emamdoost wrote: > >> In sja1105_static_config_upload, in two cases memory is leaked: when > >> static_config_buf_prepare_for_upload fails and when sja1105_inhibit_tx > >> fails. In both cases config_buf should be released. > >> Fixes: 8aa9ebccae876 (avoid leaking config_buf) > >> Fixes: 1a4c69406cc1c (avoid leaking config_buf) > >> > > > > You're not supposed to add a short description of the patch here, but > > rather the commit message of the patch you're fixing. > > Add this to your ~/.gitconfig: > > > > [pretty] > > fixes = Fixes: %h (\"%s\") > > > > And then run: > > git show --pretty=fixes 8aa9ebccae87621d997707e4f25e53fddd7e30e4 > > > > Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 > > 5-port L2 switch") > > > > git show --pretty=fixes 1a4c69406cc1c3c42bb7391c8eb544e93fe9b320 > > > > Fixes: 1a4c69406cc1 ("net: dsa: sja1105: Prevent PHY jabbering during > > switch reset") > > However the Fixes: line should not be broken up like this with newlines. Sorry, my mail client did that automatically. -Vladimir
Re: [PATCH v2] media: vimc: Implement debayer control for mean window size
On 9/18/19 1:12 AM, Shuah Khan wrote: > On 9/17/19 4:53 PM, Arthur Moraes do Lago wrote: >> Add mean window size parameter for debayer filter as a control in >> vimc-debayer. >> >> vimc-debayer was patched to allow changing mean windows parameter >> of the filter without needing to reload the driver. The parameter >> can now be set using a v4l2-ctl control(mean_window_size). >> >> Co-developed-by: Laís Pessine do Carmo >> Signed-off-by: Laís Pessine do Carmo >> Signed-off-by: Arthur Moraes do Lago >> >> Small fixes to vimc debayer mean window size patch >> --- >> Changes in V2: >> - Updated documentation >> - Added v4l2_subev_core_ops to solve errors in v4l2-ctl compliance test >> - Changed control naming to follow English capitalization rules >> - Rebased to Shuah Khan's newest patch series 171283 >> ("Collapse vimc single monolithic driver") >> - Change maximum value for mean window size >> >> We did run streaming tests, the value of 1 for mean window size has a >> nice side effect of not applying any filter, so we can see the original >> pattern. The value of 99 for mean window size worked, but was very slow. >> We wanted to find a way to show that higher values for mean window size >> yielded very similar results to just blurring the image, but we could >> not find any way to show this, so we just made the maximum value 25, >> which runs faster, but is still probably a little high. >> --- >> Documentation/media/v4l-drivers/vimc.rst | 10 +-- >> drivers/media/platform/vimc/vimc-common.h | 1 + >> drivers/media/platform/vimc/vimc-debayer.c | 89 +++--- >> 3 files changed, 79 insertions(+), 21 deletions(-) >> >> diff --git a/Documentation/media/v4l-drivers/vimc.rst >> b/Documentation/media/v4l-drivers/vimc.rst >> index a582af0509ee..91c909904b87 100644 >> --- a/Documentation/media/v4l-drivers/vimc.rst >> +++ b/Documentation/media/v4l-drivers/vimc.rst >> @@ -80,9 +80,7 @@ vimc-capture: >> Module options >> --- >> -Vimc has a few module parameters to configure the driver. >> - >> - param=value >> +Vimc has a module parameters to configure the driver. > > parameter > >> * ``sca_mult=`` >> @@ -91,12 +89,6 @@ Vimc has a few module parameters to configure the >> driver. >> original one. Currently, only supports scaling up (the default >> value >> is 3). >> -* ``deb_mean_win_size=`` >> - >> - Window size to calculate the mean. Note: the window size needs to >> be an >> - odd number, as the main pixel stays in the center of the window, >> - otherwise the next odd number is considered (the default value is >> 3). >> - >> Source code documentation >> - >> diff --git a/drivers/media/platform/vimc/vimc-common.h >> b/drivers/media/platform/vimc/vimc-common.h >> index 236412ad7548..3a5102ddf794 100644 >> --- a/drivers/media/platform/vimc/vimc-common.h >> +++ b/drivers/media/platform/vimc/vimc-common.h >> @@ -19,6 +19,7 @@ >> #define VIMC_CID_VIMC_BASE (0x00f0 | 0xf000) >> #define VIMC_CID_VIMC_CLASS (0x00f0 | 1) >> #define VIMC_CID_TEST_PATTERN (VIMC_CID_VIMC_BASE + 0) >> +#define VIMC_CID_MEAN_WIN_SIZE (VIMC_CID_VIMC_BASE + 1) >> #define VIMC_FRAME_MAX_WIDTH 4096 >> #define VIMC_FRAME_MAX_HEIGHT 2160 >> diff --git a/drivers/media/platform/vimc/vimc-debayer.c >> b/drivers/media/platform/vimc/vimc-debayer.c >> index 37f3767db469..76df2ac110c0 100644 >> --- a/drivers/media/platform/vimc/vimc-debayer.c >> +++ b/drivers/media/platform/vimc/vimc-debayer.c >> @@ -11,17 +11,12 @@ >> #include >> #include >> #include >> +#include >> +#include >> #include >> #include "vimc-common.h" >> -static unsigned int deb_mean_win_size = 3; >> -module_param(deb_mean_win_size, uint, ); >> -MODULE_PARM_DESC(deb_mean_win_size, " the window size to calculate the >> mean.\n" >> - "NOTE: the window size needs to be an odd number, as the main pixel " >> - "stays in the center of the window, otherwise the next odd number " >> - "is considered"); >> - >> enum vimc_deb_rgb_colors { >> VIMC_DEB_RED = 0, >> VIMC_DEB_GREEN = 1, >> @@ -46,6 +41,8 @@ struct vimc_deb_device { >> u8 *src_frame; >> const struct vimc_deb_pix_map *sink_pix_map; >> unsigned int sink_bpp; >> + unsigned int mean_win_size; >> + struct v4l2_ctrl_handler hdl; >> }; >> static const struct v4l2_mbus_framefmt sink_fmt_default = { >> @@ -346,11 +343,18 @@ static int vimc_deb_s_stream(struct v4l2_subdev *sd, >> int enable) >> return 0; >> } >> +static const struct v4l2_subdev_core_ops vimc_deb_core_ops = { >> + .log_status = v4l2_ctrl_subdev_log_status, >> + .subscribe_event = v4l2_ctrl_subdev_subscribe_event, >> + .unsubscribe_event = v4l2_event_subdev_unsubscribe, >> +}; >> + >> static const struct v4l2_subdev_video_ops vimc_deb_video_ops = { >> .s_stream = vimc_deb_s_strea
Re: [PATCH] [RFC] vmscan.c: add a sysctl entry for controlling memory reclaim IO congestion_wait length
On Thu 19-09-19 15:46:11, Lin Feng wrote: > > > On 9/19/19 11:49, Matthew Wilcox wrote: > > On Thu, Sep 19, 2019 at 10:33:10AM +0800, Lin Feng wrote: > > > On 9/18/19 20:33, Michal Hocko wrote: > > > > I absolutely agree here. From you changelog it is also not clear what is > > > > the underlying problem. Both congestion_wait and wait_iff_congested > > > > should wake up early if the congestion is handled. Is this not the case? > > > > > > For now I don't know why, codes seem should work as you said, maybe I > > > need to > > > trace more of the internals. > > > But weird thing is that once I set the people-disliked-tunable iowait > > > drop down instantly, this is contradictory to the code design. > > > > Yes, this is quite strange. If setting a smaller timeout makes a > > difference, that indicates we're not waking up soon enough. I see > > two possibilities; one is that a wakeup is missing somewhere -- ie the > > conditions under which we call clear_wb_congested() are wrong. Or we > > need to wake up sooner. > > > > Umm. We have clear_wb_congested() called from exactly one spot -- > > clear_bdi_congested(). That is only called from: > > > > drivers/block/pktcdvd.c > > fs/ceph/addr.c > > fs/fuse/control.c > > fs/fuse/dev.c > > fs/nfs/write.c > > > > Jens, is something supposed to be calling clear_bdi_congested() in the > > block layer? blk_clear_congested() used to exist until October 29th > > last year. Or is something else supposed to be waking up tasks that > > are sleeping on congestion? > > > > IIUC it looks like after commit a1ce35fa49852db60fc6e268038530be533c5b15, This is something for Jens to comment on. Not waiting up on congestion indeed sounds like a bug. > besides those *.c places as you mentioned above, vmscan codes will always > wait as long as 100ms and nobody wakes them up. Yes this is true but you should realize that this path is triggered only under heavy memory reclaim cases where there is nothing to reclaim because there are too many pages already isolated and we are waiting for reclaimers to make some progress on them. It is also possible that there are simply no reclaimable pages at all and we are heading the OOM situation. In both cases waiting a bit shouldn't be critical because this is really a cold path. It would be much better to have a mechanism to wake up earlier but this is likely to be non trivial and I am not sure worth the effort considering how rare this should be. -- Michal Hocko SUSE Labs
Re: possible deadlock in tower_open
On Mon, Sep 16, 2019 at 06:29:12AM -0700, syzbot wrote: > Hello, > > syzbot found the following crash on: > > HEAD commit:f0df5c1b usb-fuzzer: main usb gadget fuzzer driver > git tree: https://github.com/google/kasan.git usb-fuzzer > console output: https://syzkaller.appspot.com/x/log.txt?x=13c8d14e60 > kernel config: https://syzkaller.appspot.com/x/.config?x=5c6633fa4ed00be5 > dashboard link: https://syzkaller.appspot.com/bug?extid=66935bec147fbf68d9f8 > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > Unfortunately, I don't have any reproducer for this crash yet. > > IMPORTANT: if you fix the bug, please add the following tag to the commit: > Reported-by: syzbot+66935bec147fbf68d...@syzkaller.appspotmail.com > > == > WARNING: possible circular locking dependency detected > 5.3.0-rc7+ #0 Not tainted > -- > syz-executor.1/8155 is trying to acquire lock: > 86c1bdfc (open_disc_mutex){+.+.}, at: tower_open+0xce/0x9b0 > drivers/usb/misc/legousbtower.c:335 > > but task is already holding lock: > 0f520f73 (minor_rwsem){}, at: usb_open+0x23/0x270 > drivers/usb/core/file.c:39 > > which lock already depends on the new lock. This looks like a duplicate of https://lkml.kernel.org/r/d58eb90592add...@google.com Not sure if this is the right way to report this (quoting needed?): #syz dup: possible deadlock in usb_deregister_dev (2) Johan
Re: [PATCH] [RESEND] vmscan.c: add a sysctl entry for controlling memory reclaim IO congestion_wait length
On Thu 19-09-19 09:32:48, Lin Feng wrote: > > > On 9/18/19 20:27, Michal Hocko wrote: > > Please do not post a new version with a minor compile fixes until there > > is a general agreement on the approach. Willy had comments which really > > need to be resolved first. > > Sorry, but thanks for pointing out. > > > > > Also does this > > [...] > > > Reported-by: kbuild test robot > > really hold? Because it suggests that the problem has been spotted by > > the kbuild bot which is kinda unexpected... I suspect you have just > > added that for the minor compilation issue that you have fixed since the > > last version. > > Yes, I do know the issue is not reported by the robot, but > just followed the kbuild robot tip, this Reported-by suggested by kbuild robot > seems a little misleading, I'm not sure if it has other meanings. > 'If you fix the issue, kindly add following tag > Reported-by: kbuild test robot ' This would be normally the case for a patch which only fixes the particular issue. You can credit the bot in the changelog while documenting changes between version. -- Michal Hocko SUSE Labs
[PATCH] x86/mm: fix return value of p[um]dp_set_access_flags
Function p[um]dp_set_access_flags is used with update_mmu_cache_p[um]d and the return value from p[um]dp_set_access_flags indicates whether it is necessary to do the cache update. >From current code logic, only when changed && dirty, related page table entry would be updated. It is not necessary to update cache when the real page table entry is not changed. Signed-off-by: Wei Yang --- arch/x86/mm/pgtable.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 44816ff6411f..ba910f8ab43a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -509,9 +509,10 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ + return true; } - return changed; + return false; } int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, @@ -529,9 +530,10 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, * #PF is architecturally guaranteed to do that and in the * worst-case we'll generate a spurious fault. */ + return true; } - return changed; + return false; } #endif -- 2.17.1
[PATCH] riscv: resolve most warnings from sparse
Resolve most of the warnings emitted by sparse. The objective here is to keep arch/riscv as clean as possible with regards to sparse warnings, and to maintain this bar for subsequent patches. Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/entry.h | 29 arch/riscv/include/asm/head.h| 21 arch/riscv/include/asm/irq.h | 6 ++ arch/riscv/include/asm/pgtable.h | 3 +++ arch/riscv/include/asm/processor.h | 5 + arch/riscv/include/asm/ptrace.h | 2 ++ arch/riscv/include/asm/smp.h | 2 ++ arch/riscv/include/asm/switch_to.h | 1 + arch/riscv/include/asm/syscall.h | 3 +++ arch/riscv/include/asm/thread_info.h | 2 ++ arch/riscv/kernel/cpufeature.c | 1 + arch/riscv/kernel/module-sections.c | 1 + arch/riscv/kernel/process.c | 2 ++ arch/riscv/kernel/reset.c| 1 + arch/riscv/kernel/setup.c| 1 + arch/riscv/kernel/signal.c | 1 + arch/riscv/kernel/smp.c | 4 arch/riscv/kernel/smpboot.c | 2 ++ arch/riscv/kernel/stacktrace.c | 4 ++-- arch/riscv/kernel/syscall_table.c| 1 + arch/riscv/kernel/time.c | 2 +- arch/riscv/kernel/traps.c| 1 + arch/riscv/kernel/vdso.c | 3 ++- arch/riscv/mm/context.c | 1 + arch/riscv/mm/fault.c| 1 + arch/riscv/mm/init.c | 16 +++ arch/riscv/mm/sifive_l2_cache.c | 2 +- 27 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 arch/riscv/include/asm/entry.h create mode 100644 arch/riscv/include/asm/head.h diff --git a/arch/riscv/include/asm/entry.h b/arch/riscv/include/asm/entry.h new file mode 100644 index ..73bfcda993d0 --- /dev/null +++ b/arch/riscv/include/asm/entry.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive, Inc. + */ +#ifndef __ASM_ENTRY_H +#define __ASM_ENTRY_H + +#include +#include + +asmlinkage void do_trap_unknown(struct pt_regs *regs); +asmlinkage void do_trap_insn_misaligned(struct pt_regs *regs); +asmlinkage void do_trap_insn_fault(struct pt_regs *regs); +asmlinkage void do_trap_insn_illegal(struct pt_regs *regs); +asmlinkage void do_trap_load_misaligned(struct pt_regs *regs); +asmlinkage void do_trap_load_fault(struct pt_regs *regs); +asmlinkage void do_trap_store_misaligned(struct pt_regs *regs); +asmlinkage void do_trap_store_fault(struct pt_regs *regs); +asmlinkage void do_trap_ecall_u(struct pt_regs *regs); +asmlinkage void do_trap_ecall_s(struct pt_regs *regs); +asmlinkage void do_trap_ecall_m(struct pt_regs *regs); +asmlinkage void do_trap_break(struct pt_regs *regs); + +asmlinkage void do_notify_resume(struct pt_regs *regs, +unsigned long thread_info_flags); + +void __init trap_init(void); + +#endif /* __ASM__H */ diff --git a/arch/riscv/include/asm/head.h b/arch/riscv/include/asm/head.h new file mode 100644 index ..105fb0496b24 --- /dev/null +++ b/arch/riscv/include/asm/head.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive, Inc. + */ +#ifndef __ASM_HEAD_H +#define __ASM_HEAD_H + +#include +#include + +extern atomic_t hart_lottery; + +asmlinkage void do_page_fault(struct pt_regs *regs); +asmlinkage void __init setup_vm(uintptr_t dtb_pa); + +extern void *__cpu_up_stack_pointer[]; +extern void *__cpu_up_task_pointer[]; + +void __init parse_dtb(void); + +#endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 75576424c0f7..f0e9df6e6049 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -7,11 +7,17 @@ #ifndef _ASM_RISCV_IRQ_H #define _ASM_RISCV_IRQ_H +#include +#include + #define NR_IRQS 0 void riscv_timer_interrupt(void); void riscv_software_interrupt(void); +asmlinkage void do_IRQ(struct pt_regs *regs); +void __init init_IRQ(void); + #include #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 4f4162d90586..1be4f70ab266 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -61,6 +61,9 @@ #define PAGE_TABLE __pgprot(_PAGE_TABLE) +extern pgd_t swapper_pg_dir[]; +extern pgd_t trampoline_pg_dir[]; +extern pgd_t early_pg_dir[]; extern pgd_t swapper_pg_dir[]; /* MAP_PRIVATE permissions: xwr (copy-on-write) */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f539149d04c2..f4fb93a2f282 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -9,6 +9,7 @@ #include #include +#include /* * This decides where the kernel will search for a free chunk of vm @@ -78,6 +79,10 @@ int riscv_of_processor_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); +extern const st
Re: [PATCH v7 00/13] Device property improvements, add %pfw format specifier
On Thu, Sep 19, 2019 at 10:00:08AM +0200, Rafael J. Wysocki wrote: > On Wed, Sep 18, 2019 at 3:34 PM Sakari Ailus > wrote: > > > > Hi all, > > > > This set adds functionality into the device property API (counting a > > node's parents as well as obtaining its name) in order to support printing > > fwnode names using a new conversion specifier "%pfw". The names that are > > produced are equivalent to its OF counterpart "%pOF" on OF systems for the > > two supported modifiers ("f" and "P"). > > > > Printing a node's name is something that's been available on OF for a long > > time and if something is converted to device property API (such as the > > V4L2 fwnode framework) it always got removed of a nice feature that was > > sometimes essential in debugging. With this set, that no longer is the > > case. > > > > Note: the set now depends on 2d44d165e939 ("scsi: lpfc: Convert existing > > %pf users to %ps") that is expected from the linux-scsi tree. It is > > OK, so I will be expecting a refresh of the series when the above > commit appears in the mainline. Works for me. I'll send v8 then, with Depends-on removed. -- Sakari Ailus sakari.ai...@linux.intel.com
Re: [PATCH 1/1] sched/eas: introduce system-wide overutil indicator
Hi, Could you please CC me on later versions of this ? I'm interested. On Thursday 19 Sep 2019 at 15:20:22 (+0800), YT Chang wrote: > When the system is overutilization, the load-balance crossing > clusters will be triggered and scheduler will not use energy > aware scheduling to choose CPUs. > > The overutilization means the loading of ANY CPUs > exceeds threshold (80%). > > However, only 1 heavy task or while-1 program will run on highest > capacity CPUs and it still result to trigger overutilization. So > the system will not use Energy Aware scheduling. > > To avoid it, a system-wide over-utilization indicator to trigger > load-balance cross clusters. > > The policy is: > The loading of "ALL CPUs in the highest capacity" > exceeds threshold(80%) or > The loading of "Any CPUs not in the highest capacity" > exceed threshold(80%) > > Signed-off-by: YT Chang Right, so we originally went for the simpler implementation because in general when you have the biggest CPUs of the system running flat out at max freq, the micro-optimizations for energy on littles don't matter all that much. Is there a use-case where you see a big difference ? A second thing is RT pressure. If a big CPU is used at 50% by a CFS task and 50% by RT, we should mark it overutilized. Otherwise EAS will think the CFS task is 50% and try to down-migrate it. But the truth is, we dont know the size of the task ... So, I believe your patch breaks that ATM. And there is a similar problem with misfit. That is, a task running flat out on a big CPU will be flagged as misfit, even if there is nothing we can do about (we can't up-migrate it for obvious reasons). So perhaps we should look at a common solution for both issues, if deemed useful. > --- > kernel/sched/fair.c | 76 > + > 1 file changed, 65 insertions(+), 11 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 036be95..f4c3d70 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -5182,10 +5182,71 @@ static inline bool cpu_overutilized(int cpu) > static inline void update_overutilized_status(struct rq *rq) > { > if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) { > - WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); > - trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); > + if (capacity_orig_of(cpu_of(rq)) < rq->rd->max_cpu_capacity) { > + WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED); > + trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED); > + } > } > } > + > +static > +void update_system_overutilized(struct sched_domain *sd, struct cpumask > *cpus) > +{ > + unsigned long group_util; > + bool intra_overutil = false; > + unsigned long max_capacity; > + struct sched_group *group = sd->groups; > + struct root_domain *rd; > + int this_cpu; > + bool overutilized; > + int i; > + > + this_cpu = smp_processor_id(); > + rd = cpu_rq(this_cpu)->rd; > + overutilized = READ_ONCE(rd->overutilized); > + max_capacity = rd->max_cpu_capacity; > + > + do { > + group_util = 0; > + for_each_cpu_and(i, sched_group_span(group), cpus) { > + group_util += cpu_util(i); > + if (cpu_overutilized(i)) { > + if (capacity_orig_of(i) < max_capacity) { This is what breaks things with RT pressure I think. > + intra_overutil = true; > + break; > + } > + } > + } > + > + /* > + * A capacity base hint for over-utilization. > + * Not to trigger system overutiled if heavy tasks > + * in Big.cluster, so > + * add the free room(20%) of Big.cluster is impacted which means > + * system-wide over-utilization, > + * that considers whole cluster not single cpu > + */ > + if (group->group_weight > 1 && (group->sgc->capacity * 1024 < > + group_util * capacity_margin)) { > + intra_overutil = true; > + break; > + } What if we have only one big MC domain with both big and little CPUs and no DIE ? Say you have 4 big tasks, 4 big CPUs, 4 little CPUs (idle). You'll fail to mark the system overutilized no ? > + > + group = group->next; > + > + } while (group != sd->groups && !intra_overutil); > + > + if (overutilized != intra_overutil) { > + if (intra_overutil == true) { > + WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); > + trace_sched_overutilized_tp(rd,
RE: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM SoC
Hi Dilip, > -Original Message- > From: devicetree-ow...@vger.kernel.org ow...@vger.kernel.org> On Behalf Of Dilip Kota > Sent: Donnerstag, 19. September 2019 10:06 > To: Martin Blumenstingl > Cc: Chuan Hua, Lei ; Kim, Cheol Yong > ; devicet...@vger.kernel.org; linux- > ker...@vger.kernel.org; p.za...@pengutronix.de; Wu, Qiming ming...@intel.com>; r...@kernel.org; Hauke Mehrtens > Subject: Re: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM > SoC > > Hi Martin, > > On 9/12/2019 2:38 PM, Dilip Kota wrote: > > Re-sending the mail, because of delivery failure. > > sorry for the spam. > > > > Hi Martin, > > > > On 9/6/2019 4:53 AM, Martin Blumenstingl wrote: > >> Hi, > >> > >> On Thu, Sep 5, 2019 at 4:38 AM Chuan Hua, Lei > >> wrote: > >> [...] > >> I'm not surprised that we got some of the IP block layout for > >> the > >> VRX200 RCU "wrong" - all "documentation" we have is the old > >> Lantiq UGW > >> (BSP). > >> with proper documentation (as in a "public datasheet for the > >> SoC") it > >> would be easy to spot these mistakes (at least I assume that > the > >> quality of the Infineon / Lantiq datasheets is excellent). > >> > >> back to reset-intel-syscon: > >> assigning only one job to the RCU hardware is a good idea (in > >> my opinion). > >> that brings up a question: why do we need the "syscon" > >> compatible for > >> the RCU node? > >> this is typically used when registers are accessed by another > >> IP block > >> and the other driver has to access these registers as well. > >> does this > >> mean that there's more hidden in the RCU registers? > > As I mentioned, some other misc registers are put into RCU > > even they > > don't belong to reset functions. > OK, just be aware that there are also rules for syscon > compatible > drivers, see for example: [0] > if Rob (dt-bindings maintainer) is happy with the documentation > in > patch 1 then I'm fine with it as well. > for my own education I would appreciate if you could describe > these > "other misc registers" with a few sentences (I assume that this > can > also help Rob) > >>> For LGM, RCU is clean. There would be no MISC register after > >>> software's > >>> feedback. These misc registers will be moved to chiptop/misc > >>> groups(implemented by syscon). For legacy SoC, we do have a lot > >>> MISC > >>> registers for different SoCs. > >> OK, I think I understand now: chiptop != RCU > >> so RCU really only has one purpose: handling resets > >> while chiptop manages all the random bits > >> > >> does this means we don't need RCU to match "syscon"? > > If we don't support legacy SoC with the same driver, we don't need > > syscon, just regmap. Regmap is a must for us since we will use > regmap > > proxy to implement secure rest via secure processor. > I think we should drop the syscon compatible for LGM then > even for the legacy SoCs the reset controller should not have a > syscon > compatible: instead it should have a syscon parent (as the current > "lantiq,xrx200-reset" binding requires and as suggested by Rob for > another IP block: [0]) > >>> I am not sure if syscon parent really matches hardware > implementation. > >>> In all our Networking SoCs, chiptop is kind of misc register > >>> collection. > >>> Some registers can't belong to any particular group, or they need to > >>> work together with other modules(therefore, these misc registers > would > >>> be accessed by two or more modules). However, chiptop is not a > hardware > >>> module. > >> indeed, chiptop should not have any child nodes (based on your > >> explanation). > >> I was referring to VRX200 where the RCU syscon has various children > >> (one child node for each hardware module that's part of RCU: reset > >> controller, 2x USB PHY, ...) > >> > >> back to LGM: > >> you said that the LGM RCU registers only contain the reset > controller. > >> thus I see no need for the syscon compatible > >> > keeping regmap is great in my opinion because it's a nice API and > gets > rid of some boilerplate > even better if it makes things easier for accessing the secure > processor > > [...] > >>> 4. Code not optimized and intel internal review not > >>> assessed. > >> insights from you (like the issue with the reset > >> callback) are very > >> valuable - this shows that we should focus on having one > >> driver. > >> > >>> Based on the above findings, I would suggest > >>> reset-lantiq.c to move to > >>> reset-intel-syscon.c > >> my concern with having two sepa
Re: [PATCH v1 1/2] pinctrl: Add pinmux & GPIO controller driver for new SoC
Hi Andy, Thanks for your comments. I agree & will address all your review concerns in v2 except below mentioned points where i need more clarification. On 12/9/2019 10:30 PM, Andy Shevchenko wrote: >> +static const struct pin_config pin_cfg_type[] = { >> +{"intel,pullup",PINCONF_TYPE_PULL_UP}, >> +{"intel,pulldown", PINCONF_TYPE_PULL_DOWN}, >> +{"intel,drive-current", PINCONF_TYPE_DRIVE_CURRENT}, >> +{"intel,slew-rate", PINCONF_TYPE_SLEW_RATE}, >> +{"intel,open-drain",PINCONF_TYPE_OPEN_DRAIN}, >> +{"intel,output",PINCONF_TYPE_OUTPUT}, >> +}; > Doesn't DT provide a generic naming scheme for these? For pinctrl multiplexing/configuration nodes, DT does provide generic names for some properties but it does not seem to mandate it. It states that the content of pin mux/conf nodes is defined entirely by the binding of the pin controller device. There are many other examples of pinctrl drivers which use different prop names than generic ones. For e.g. Samsung. Our understanding is: if the node is generic i.e. handled by framework, then it should use generic name. But if the node is private to driver, then it is better to prefix it with driver name to avoid any conflicts. >> +virq = irq_find_mapping(desc->irq_domain, offset); >> +if (virq) >> +return virq; >> >> +else >> +return irq_create_mapping(desc->irq_domain, offset); > Don't we have more clever helper for this? AFAIR something like this is done > in > IRQ framework when you get a mapping from certain domain. > I guess, you mean irq_domain_add_simple(). This function does optionally map the IRQs but only statically assigned IRQs. We need dynamic gpio_to_irq mappings which is why the gpio_chip->to_irq() is optionally provided so the drivers requiring dynamic IRQ mappings can override this function. But i can definitely get rid of redundant irq_find_mapping() because irq_create_mapping() anyways invokes irq_find_mapping() first. I will remove irq_find_mapping() and just use irq_create_mapping() here. >> +static void eqbr_irq_handler(struct irq_desc *desc) >> +{ >> +struct intel_gpio_desc *gc; >> +struct irq_chip *ic; >> +u32 pins, offset; >> +unsigned int virq; >> + >> +gc = irq_desc_get_handler_data(desc); >> +ic = irq_desc_get_chip(desc); >> + >> +chained_irq_enter(ic, desc); >> +pins = readl(gc->membase + GPIO_IRNCR); >> + >> +for_each_set_bit(offset, (unsigned long *)&pins, gc->bank->nr_pins) { >> +virq = irq_linear_revmap(gc->irq_domain, offset); >> +if (!virq) >> +pr_err("gc[%s]:pin:%d irq not registered!\n", >> + gc->name, offset); > dev_err() ? But Why is it needed? Shouldn't be registered as a spurious IRQ > for > later debugging? IMHO, spurious IRQ can only be registered if none of the pins are valid. Please see below alternative way of handling it & let me know if this makes more sense. @@ -313,6 +313,7 @@ static void eqbr_irq_handler(struct irq_desc *desc) struct irq_chip *ic; u32 pins, offset; unsigned int virq; + int handled = 0; gc = irq_desc_get_handler_data(desc); ic = irq_desc_get_chip(desc); @@ -322,12 +323,16 @@ static void eqbr_irq_handler(struct irq_desc *desc) for_each_set_bit(offset, (unsigned long *)&pins, gc->bank->nr_pins) { virq = irq_linear_revmap(gc->irq_domain, offset); - if (!virq) - pr_err("gc[%s]:pin:%d irq not registered!\n", - gc->name, offset); - else + if (virq) { generic_handle_irq(virq); + handled++; + } } + + /* Spurious interrupt */ + if (handled == 0) + handle_bad_irq(desc); + chained_irq_exit(ic, desc); } >> +static int add_config(struct intel_pinctrl_drv_data *drvdata, >> + unsigned long **confs, unsigned int *nr_conf, >> + unsigned long pinconf) >> +{ >> +unsigned long *configs; >> +struct device *dev = drvdata->dev; >> +unsigned int num_conf = *nr_conf + 1; >> + >> +if (!(*nr_conf)) { >> +configs = devm_kcalloc(dev, 1, sizeof(pinconf), GFP_KERNEL); >> +if (!configs) >> +return -ENOMEM; >> +} else { >> +configs = devm_kmemdup(dev, *confs, >> + num_conf * sizeof(pinconf), GFP_KERNEL); >> +if (!configs) >> +return -ENOMEM; >> +devm_kfree(dev, *confs); > This a red flag for using devm_*(). > Either a sign of bad design or misplacement of devm_*(). I can switch to non devm versions i.e. kmalloc/kfree for these buffer allocations. But this leaves me with a fundamental question. As i understand, devm*() variants are to all
Re: [PATCH v2 2/2] EDAC: al-mc-edac: Introduce Amazon's Annapurna Labs Memory Controller EDAC
Thanks for the review. On 9/18/2019 8:47 PM, James Morse wrote: Hi Talel, On 15/09/2019 07:43, Talel Shenhar wrote: The Amazon's Annapurna Labs Memory Controller EDAC supports ECC capability for error detection and correction (Single bit error correction, Double detection). This driver introduces EDAC driver for that capability. Is there any documentation for this memory controller? Unfortunately, we don't have public documentation for it. diff --git a/drivers/edac/al_mc_edac.c b/drivers/edac/al_mc_edac.c new file mode 100644 index 000..f9763d4 --- /dev/null +++ b/drivers/edac/al_mc_edac.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ +#include #include for hweight_long() shall be part of v3. btw: do you use some tool to catch those missing includes? +#include +#include #include for platform_get_resource() shall be part of v3. +#include "edac_module.h" +/* Registers Values */ +#define AL_MC_MSTR_DEV_CFG_X4 0 +#define AL_MC_MSTR_DEV_CFG_X8 1 +#define AL_MC_MSTR_DEV_CFG_X16 2 +#define AL_MC_MSTR_DEV_CFG_X32 3 +#define AL_MC_MSTR_RANKS_MAX 4 Is this a fixed property of the memory controller, or is it a limit imposed from somewhere else. (Does it need to come from the DT?) Yes. this is a fixed behavior hence not part of dt. +#define AL_MC_MSTR_DATA_BUS_WIDTH_X64 0 + +#define DRV_NAME "al_mc_edac" +#define AL_MC_EDAC_MSG_MAX 256 +#define AL_MC_EDAC_MSG(message, buffer_size, type, \ + rank, row, bg, bank, column, syn0, syn1, syn2) \ + snprintf(message, buffer_size, \ +"%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x " \ +"syn0: 0x%x syn1: 0x%x syn2: 0x%x", \ +type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",\ +rank, row, bg, bank, column, syn0, syn1, syn2) + +struct al_mc_edac { + void __iomem *mmio_base; + int irq_ce; + int irq_ue; +}; + +static int al_mc_edac_handle_ce(struct mem_ctl_info *mci) +{ + struct al_mc_edac *al_mc = mci->pvt_info; + u32 eccerrcnt; + u16 ce_count; + u32 ecccaddr0; + u32 ecccaddr1; + u32 ecccsyn0; + u32 ecccsyn1; + u32 ecccsyn2; + u8 rank; + u32 row; + u8 bg; + u8 bank; + u16 column; + char msg[AL_MC_EDAC_MSG_MAX]; (Some of these could go on the same line, same with UE below) Shall be part of v3 + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, +ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); You used active_ranks as the layer size in al_mc_edac_probe(). Can't you supply the rank here? (If its not useful, why is it setup like this in al_mc_edac_probe()?) Seems it can be removed from probe. Shall be part of v3. + u8 bank; + u16 column; + char msg[AL_MC_EDAC_MSG_MAX]; + + eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); + ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt); + if (!ue_count) + return 0; + + eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0); + eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1); + eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0); + eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1); + eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2); + + writel(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR, + al_mc->mmio_base + AL_MC_ECC_CLEAR); + + dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", + eccuaddr0, eccuaddr1); + + rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0); + row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0); + + bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1); + bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1); + column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1); + + AL_MC_EDAC_MSG(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED, + rank, row, bg, bank, column, + eccusyn0, eccusyn1, eccusyn2); + + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, +ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); What happens when this code runs at the same time as the corrected error handler calling edac_mc_handler_error() with this same mci? This could happen on a second CPU, or on one cpu if the corrected handler is polled. edac_mc_handle_error() memset's the edac_raw_error_desc in mci, so it can't be called in parallel, or twice on the same cpu. I think you need an irqsave spinlock around the calls to edac_mc_handle_error(). shall add locks in v3. + +static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info) +{ + struct platform_device *pdev = in
Re: Usecases for the per-task latency-nice attribute
On 9/18/19 10:46 PM, Tim Chen wrote: > On 9/18/19 5:41 AM, Parth Shah wrote: >> Hello everyone, >> >> As per the discussion in LPC2019, new per-task property like latency-nice >> can be useful in certain scenarios. The scheduler can take proper decision >> by knowing latency requirement of a task from the end-user itself. >> >> There has already been an effort from Subhra for introducing Task >> latency-nice [1] values and have seen several possibilities where this type >> of >> interface can be used. >> >> From the best of my understanding of the discussion on the mail thread and >> in the LPC2019, it seems that there are two dilemmas; > > Thanks for starting the discussion. > > >> >> --- >> **Usecases** >> --- >> >> $> TurboSched >> >> TurboSched [2] tries to minimize the number of active cores in a socket by >> packing an un-important and low-utilization (named jitter) task on an >> already active core and thus refrains from waking up of a new core if >> possible. This requires tagging of tasks from the userspace hinting which >> tasks are un-important and thus waking-up a new core to minimize the >> latency is un-necessary for such tasks. >> As per the discussion on the posted RFC, it will be appropriate to use the >> task latency property where a task with the highest latency-nice value can >> be packed. >> But for this specific use-cases, having just a binary value to know which >> task is latency-sensitive and which not is sufficient enough, but having a >> range is also a good way to go where above some threshold the task can be >> packed. >> >> > > $> Separating AVX512 tasks and latency sensitive tasks on separate cores > - > Another usecase we are considering is to segregate those workload that will > pull down > core cpu frequency (e.g. AVX512) from workload that are latency sensitive. > There are certain tasks that need to provide a fast response time (latency > sensitive) > and they are best scheduled on cpu that has a lighter load and not have other > tasks running on the sibling cpu that could pull down the cpu core frequency. > > Some users are running machine learning batch tasks with AVX512, and have > observed > that these tasks affect the tasks needing a fast response. They have to > rely on manual CPU affinity to separate these tasks. With appropriate > latency hint on task, the scheduler can be taught to separate them. > Thanks for listing out your usecase. This is interesting. If scheduler has the knowledge of AVX512 tasks then with these interface the scheduler can refrain from picking such core occupying AVX512 tasks for the task with "latency-nice = -19". So I guess for this specific use-case, the value for such per-task attribute should have range (most probably [-19,20]) and the name "latency-nice" also suits the need. Do you have any specific values in mind for such attr? Thanks, Parth
Re: [PATCH 2/5] powerpc: Map & release OpenCAPI LPC memory
Le 19/09/2019 à 02:58, Alastair D'Silva a écrit : On Wed, 2019-09-18 at 16:03 +0200, Frederic Barrat wrote: Le 17/09/2019 à 03:42, Alastair D'Silva a écrit : From: Alastair D'Silva Map & release OpenCAPI LPC memory. Signed-off-by: Alastair D'Silva --- arch/powerpc/include/asm/pnv-ocxl.h | 2 ++ arch/powerpc/platforms/powernv/ocxl.c | 42 +++ 2 files changed, 44 insertions(+) diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 7de82647e761..f8f8ffb48aa8 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -32,5 +32,7 @@ extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); extern void pnv_ocxl_free_xive_irq(u32 irq); +extern u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size); +extern void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 8c65aacda9c8..81393728d6a3 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -475,6 +475,48 @@ void pnv_ocxl_spa_release(void *platform_data) } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); +u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); + u32 bdfn = (pdn->busno << 8) | pdn->devfn; We can spare a call to pci_get_pdn() with bdfn = (pdev->bus->number << 8) | pdev->devfn; Ok. + u64 base_addr = 0; + + int rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size, &base_addr); + + WARN_ON(rc); Instead of a WARN, we should catch the error and return a null address to the caller. base_addr will be 0 in the error case, are you suggesting we just remove the WARN_ON()? Well, we don't really have any reason to keep going if the opal call fails, right? And anyway, I wouldn't make any assumption on the content of base_addr if the call fails. But my remark was really to avoid polluting the logs with the WARN output. The stack backtrace and register content is scary and is not going to help in that situation. A proper error message is more suitable. Fred + + base_addr = be64_to_cpu(base_addr); + + rc = check_hotplug_memory_addressable(base_addr, base_addr + size); That code is missing? That's added in the following patch on the mm list: [PATCH v3 1/2] memory_hotplug: Add a bounds check to check_hotplug_memory_range() + if (rc) { + dev_warn(&pdev->dev, +"LPC memory range 0x%llx-0x%llx is not fully addressable", +base_addr, base_addr + size - 1); + return 0; + } + + + return base_addr; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup); + +void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); + u32 bdfn; + int rc; + + bdfn = (pdn->busno << 8) | pdn->devfn; + rc = opal_npu_mem_release(phb->opal_id, bdfn); + WARN_ON(rc); Same comments as above. Fred +} +EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release); + + int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) { struct spa_data *data = (struct spa_data *) platform_data;
Re: [Patch V2] soc/tegra: fuse: Add fuse clock check in tegra_fuse_readl
On Tue, Sep 03, 2019 at 04:26:52PM +0530, Nagarjuna Kristam wrote: > tegra_fuse_readl() can be called from drivers at any time. If this API is > called before tegra_fuse_probe(), we end up enabling clock before it is > registered. Add check for fuse clock in tegra_fuse_readl() and return > corresponding error if any. > > Signed-off-by: Nagarjuna Kristam > --- > V2: > - Added Null and other error checks for fuse->clk. > --- > drivers/soc/tegra/fuse/fuse-tegra.c | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) Hi ARM-SoC maintainers, can you pick this up as a bugfix for v5.4-rc1? Would you prefer to pick it up directly or do you want a pull request for this? The patchwork link is: http://patchwork.ozlabs.org/patch/1156928/ And in that case: Acked-by: Thierry Reding Thanks, Thierry > diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c > b/drivers/soc/tegra/fuse/fuse-tegra.c > index 3eb44e6..58996c6 100644 > --- a/drivers/soc/tegra/fuse/fuse-tegra.c > +++ b/drivers/soc/tegra/fuse/fuse-tegra.c > @@ -186,9 +186,12 @@ u32 __init tegra_fuse_read_early(unsigned int offset) > > int tegra_fuse_readl(unsigned long offset, u32 *value) > { > - if (!fuse->read) > + if (!fuse->read || !fuse->clk) > return -EPROBE_DEFER; > > + if (IS_ERR(fuse->clk)) > + return PTR_ERR(fuse->clk); > + > *value = fuse->read(fuse, offset); > > return 0; > -- > 2.7.4 > signature.asc Description: PGP signature
Re: [PATCH 3/5] ocxl: Tally up the LPC memory on a link & allow it to be mapped
Le 19/09/2019 à 06:55, Alastair D'Silva a écrit : On Wed, 2019-09-18 at 16:02 +0200, Frederic Barrat wrote: Le 17/09/2019 à 03:42, Alastair D'Silva a écrit : From: Alastair D'Silva Tally up the LPC memory on an OpenCAPI link & allow it to be mapped Signed-off-by: Alastair D'Silva --- drivers/misc/ocxl/core.c | 9 + drivers/misc/ocxl/link.c | 61 +++ drivers/misc/ocxl/ocxl_internal.h | 42 + 3 files changed, 112 insertions(+) diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c index b7a09b21ab36..fdfe4e0a34e1 100644 --- a/drivers/misc/ocxl/core.c +++ b/drivers/misc/ocxl/core.c @@ -230,8 +230,17 @@ static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev) if (rc) goto err_free_pasid; + if (afu->config.lpc_mem_size || afu- config.special_purpose_mem_size) { + rc = ocxl_link_add_lpc_mem(afu->fn->link, + afu->config.lpc_mem_size + afu- config.special_purpose_mem_size); I don't think we should count the special purpose memory, as it's not meant to be accessed through the GPU mem BAR, but I'll check. At least for OpenCAPI 3.0, there is no other in-spec way to access the memory if it is not mapped by the NPU. Yes, that's clarified now and we should take the special purpose memory into account when defining the full range. Fred What happens when unconfiguring the AFU? We should reduce the range (see also below). Partial reconfig doesn't seem so far off, so we should take it into account. The mapping is left until the last AFU on the link offlines it's memory, at which point we clear the mapping from the NPU. + if (rc) + goto err_free_mmio; + } + return 0; +err_free_mmio: + unmap_mmio_areas(afu); err_free_pasid: reclaim_afu_pasid(afu); err_free_actag: diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 58d111afd9f6..2874811a4398 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -84,6 +84,11 @@ struct ocxl_link { int dev; atomic_t irq_available; struct spa *spa; + struct mutex lpc_mem_lock; + u64 lpc_mem_sz; /* Total amount of LPC memory presented on the link */ + u64 lpc_mem; + int lpc_consumers; + void *platform_data; }; static struct list_head links_list = LIST_HEAD_INIT(links_list); @@ -396,6 +401,8 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l if (rc) goto err_spa; + mutex_init(&link->lpc_mem_lock); + /* platform specific hook */ rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, &link->platform_data); @@ -711,3 +718,57 @@ void ocxl_link_free_irq(void *link_handle, int hw_irq) atomic_inc(&link->irq_available); } EXPORT_SYMBOL_GPL(ocxl_link_free_irq); + +int ocxl_link_add_lpc_mem(void *link_handle, u64 size) +{ + struct ocxl_link *link = (struct ocxl_link *) link_handle; + + u64 orig_size; + bool good = false; + + mutex_lock(&link->lpc_mem_lock); + orig_size = link->lpc_mem_sz; + link->lpc_mem_sz += size; We have a choice to make here: 1. either we only support one LPC memory-carrying AFU (and the above is overkill) 2. or we support multiple AFUs with LPC memory (on the same function), but then I think the above is too simple. From the opencapi spec, each AFU can define a chunk of memory with a starting address and a size. There's no rule which says they have to be contiguous. There's no rule which says it must start at 0. So to support multiple AFUs with LPC memory, we should record the current maximum range instead of just the global size. Ultimately, we need to tell the NPU the range of permissible addresses. It starts at 0, so we need to take into account any intial offset and holes. I would go for option 2, to at least be consistent within ocxl and support multiple AFUs. Even though I don't think we'll see FPGA images with multiple AFUs with LPC memory any time soon. Ill rework this to take an offset & size, the NPU will map from the base address up to the largest offset + size provided across all AFUs on the link. + good = orig_size < link->lpc_mem_sz; + mutex_unlock(&link->lpc_mem_lock); + + // Check for overflow + return (good) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(ocxl_link_add_lpc_mem); Do the symbol really need to be exported? IIUC, the next patch defines a higher level ocxl_afu_map_lpc_mem() which is meant to be called by a calling driver. No, I'll remove it. + +u64 ocxl_link_lpc_map(void *link_handle, struct pci_dev *pdev) +{ + struct ocxl_link *link = (struct ocxl_link *) link_handle; + + mutex_lock(&link->lpc_mem_lock); + if (link->lpc_mem) { + u64 lpc_mem = link->
Re: [PATCH v2 1/3] dt-bindings: dmaengine: dma-common: Change dma-channel-mask to uint32-array
On 18/09/2019 17.21, Rob Herring wrote: + - description: Mask of chnanels X-(X+31) >>> >>> Obviously, this was not validated with 'make dt_binding_check'. >> make dt_bindings_check >> make: *** No rule to make target 'dt_bindings_check'. Stop. > > Read Documentation/devicetree/writing-schema.md (or .rst in next). > > Either your config doesn't have DTC enabled or you don't have > dt-schema installed. I have reinstalled dt-schema and added $HOME/.local/bin to PATH and now 'make dt_binding_check' is working and passing for dma-common.yaml. For some reason it did not validate the new dma-domain.yaml from another series, I guess it need to be added to some list? - Péter Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
I am going to sue you for your ink pad replacement racket.
Hi, I'm a licensed attorney, I am going to sue you for this bullshit: https://epson.com/Support/wa00369 The Maintenance Reset Utility was already used on your product. The Maintenance Reset Utility cannot be used again in order to ensure the proper operation of our devices to It is not your device, it is mine. I own the title to said personal property. You have engaged in a conspiracy to deprive me of the use of said property. I will be filing suit soon. Go and fuck yourselves.
Re: [GIT] Networking
From: Linus Torvalds Date: Wed, 18 Sep 2019 13:37:57 -0700 > I've obviously already pulled this (and only noticed when I was > testing further on my laptop), but please explain or fix. I'll take a look, thanks.
RE: [PATCH] scsi: qedf: Remove always false 'tmp_prio < 0' statement
> -Original Message- > From: linux-scsi-ow...@vger.kernel.org On > Behalf Of Austin Kim > Sent: Thursday, September 19, 2019 1:26 PM > To: j...@linux.ibm.com; martin.peter...@oracle.com > Cc: linux-s...@vger.kernel.org; linux-kernel@vger.kernel.org; QLogic-Storage- > upstr...@cavium.com; austindh@gmail.com > Subject: [PATCH] scsi: qedf: Remove always false 'tmp_prio < 0' statement > > Since tmp_prio is declared as u8, the following statement is always false. >tmp_prio < 0 > > So remove 'always false' statement. > > Signed-off-by: Austin Kim > --- > drivers/scsi/qedf/qedf_main.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c > index > 1659d35..59ca98f 100644 > --- a/drivers/scsi/qedf/qedf_main.c > +++ b/drivers/scsi/qedf/qedf_main.c > @@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct > qed_dcbx_get *get, u32 mib_type) > tmp_prio = get->operational.app_prio.fcoe; > if (qedf_default_prio > -1) > qedf->prio = qedf_default_prio; > - else if (tmp_prio < 0 || tmp_prio > 7) { > + else if (tmp_prio > 7) { > QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, > "FIP/FCoE prio %d out of range, setting to %d.\n", > tmp_prio, QEDF_DEFAULT_PRIO); > -- > 2.6.2 Hi Austin, Thanks for the patch. Acked-by: Saurav Kashyap
I am going to sue you for your ink pad replacement racket.
Dear ipru_feedb...@ea.epson.com; Hi, I'm a licensed attorney, I am going to sue you for this bullshit: https://epson.com/Support/wa00369 The Maintenance Reset Utility was already used on your product. The Maintenance Reset Utility cannot be used again in order to ensure the proper operation of our devices to It is not your device, it is mine. I own the title to said personal property. You have engaged in a conspiracy to deprive me of the use of said property. I will be filing suit soon. Go and fuck yourselves.
Question on priorities with CAN
Hey, I work on a beaglebone-like board using the AM335x can chip, with an RT kernel. The can irq task is by default rt prio 50. I have a process running on rt prio 10 using CAN_RAW socket. I see that softirqd runs on regular timeslicing. Do CAN frames need to pass through the softirqd in order to arrive on my process? I would need to elevate the softirqd's rt prio also in that case? Kind regards, Kurt
Linux 4.14.145
I'm announcing the release of the 4.14.145 kernel. All users of the 4.14 kernel series must upgrade. The updated 4.14.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.14.y and can be browsed at the normal kernel.org git web browser: https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile |2 arch/mips/Kconfig |3 - arch/mips/include/asm/smp.h| 12 + arch/mips/sibyte/common/Makefile |1 arch/mips/sibyte/common/dma.c | 14 -- arch/mips/vdso/Makefile|4 + arch/powerpc/include/asm/uaccess.h |1 arch/s390/kvm/interrupt.c | 10 arch/s390/kvm/kvm-s390.c |2 arch/x86/Makefile |1 arch/x86/kvm/vmx.c |7 ++- arch/x86/kvm/x86.c |7 +++ drivers/base/core.c| 53 +- drivers/bluetooth/btusb.c |5 -- drivers/clk/rockchip/clk-mmc-phase.c |4 - drivers/crypto/talitos.c | 67 - drivers/firmware/ti_sci.c |8 +-- drivers/gpio/gpiolib-acpi.c| 42 ++-- drivers/gpio/gpiolib.c | 20 ++--- drivers/gpu/drm/mediatek/mtk_drm_drv.c |5 +- drivers/gpu/drm/meson/meson_plane.c| 16 +++ drivers/isdn/capi/capi.c | 10 drivers/mtd/nand/mtk_nand.c| 21 -- drivers/net/phy/phylink.c |6 +- drivers/net/tun.c | 16 +-- drivers/net/usb/cdc_ether.c| 13 -- drivers/nvmem/core.c | 15 +-- drivers/pci/pci-driver.c |3 - drivers/platform/x86/pmc_atom.c|8 +++ fs/btrfs/compression.c | 31 +++ fs/btrfs/compression.h |3 + fs/btrfs/props.c |6 -- fs/btrfs/tree-log.c|8 +-- fs/ubifs/tnc.c | 16 +-- include/uapi/linux/isdn/capicmd.h |1 kernel/irq/resend.c|2 net/bridge/br_mdb.c|2 net/core/dev.c |2 net/core/skbuff.c | 19 + net/ipv4/tcp_input.c |2 net/ipv6/ping.c|2 net/sched/sch_hhf.c|2 net/sctp/protocol.c|2 net/sctp/sm_sideeffect.c |2 net/tipc/name_distr.c |3 - 45 files changed, 365 insertions(+), 114 deletions(-) Alex Williamson (1): PCI: Always allow probing with driver_override Andrew F. Davis (1): firmware: ti_sci: Always request response from firmware Bjørn Mork (1): cdc_ether: fix rndis support for Mediatek based smartphones Christophe JAILLET (2): ipv6: Fix the link time qualifier of 'ping_v6_proc_exit_net()' sctp: Fix the link time qualifier of 'sctp_ctrlsock_exit()' Christophe Leroy (6): crypto: talitos - check AES key size crypto: talitos - fix CTR alg blocksize crypto: talitos - check data blocksize in ablkcipher. crypto: talitos - fix ECB algs ivsize crypto: talitos - Do not modify req->cryptlen on decryption. crypto: talitos - HMAC SNOOP NO AFEU mode requires SW icv checking. Cong Wang (1): sch_hhf: ensure quantum and hhf_non_hh_weight are non-zero David Sterba (1): btrfs: compression: add helper for type to string conversion Douglas Anderson (1): clk: rockchip: Don't yell about bad mmc phases when getting Eric Biggers (1): isdn/capi: check message length in capi_write() Filipe Manana (1): Btrfs: fix assertion failure during fsync and use of stale transaction Fuqian Huang (1): KVM: x86: work around leak of uninitialized stack contents Greg Kroah-Hartman (2): Revert "MIPS: SiByte: Enable swiotlb for SWARM, LittleSur and BigSur" Linux 4.14.145 Hans de Goede (1): gpiolib: acpi: Add gpiolib_acpi_run_edge_events_on_boot option and blacklist Jean Delvare (1): nvmem: Use the same permissions for eeprom as for nvmem Johannes Thumshirn (1): btrfs: correctly validate compression type Kent Gibson (2): gpio: fix line flag validation in linehandle_create gpio: fix line flag validation in lineevent_create Linus Torvalds (1): x86/build: Add -Wnoaddress-of-packed-member to REALMODE_CFLAGS, to silence GCC9 build warning Mario Limonciello (1): Revert "Bluetooth: btusb: driver to enable the usb-wakeup feature" Muchun Song (1): driver core: Fix use-after-free and double free on glue directory Neal Cardwell (1): tcp: fix tcp_ecn_withdraw_cwr() to clear
Linux 4.19.74
I'm announcing the release of the 4.19.74 kernel. All users of the 4.19 kernel series must upgrade. The updated 4.19.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y and can be browsed at the normal kernel.org git web browser: https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile |2 arch/powerpc/include/asm/uaccess.h |1 arch/s390/kvm/interrupt.c | 10 +++ arch/s390/kvm/kvm-s390.c |4 + arch/x86/Makefile |1 arch/x86/kvm/vmx.c |7 +- arch/x86/kvm/x86.c |7 ++ arch/x86/purgatory/Makefile| 35 +++-- drivers/base/core.c| 53 +++ drivers/bluetooth/btusb.c |5 - drivers/clk/rockchip/clk-mmc-phase.c |4 - drivers/crypto/talitos.c | 67 ++--- drivers/firmware/ti_sci.c |8 +- drivers/gpio/gpiolib-acpi.c| 42 ++- drivers/gpio/gpiolib.c | 16 - drivers/gpu/drm/drm_panel_orientation_quirks.c | 12 drivers/gpu/drm/mediatek/mtk_drm_drv.c |5 + drivers/gpu/drm/meson/meson_plane.c| 16 + drivers/iio/adc/stm32-dfsdm-adc.c |4 - drivers/isdn/capi/capi.c | 10 +++ drivers/mtd/nand/raw/mtk_nand.c| 21 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |8 ++ drivers/net/phy/phylink.c |6 +- drivers/net/tun.c | 16 - drivers/net/usb/cdc_ether.c| 13 +++- drivers/net/wireless/rsi/rsi_91x_usb.c |1 drivers/nvmem/core.c | 15 - drivers/pci/pci-driver.c |3 - drivers/platform/x86/pmc_atom.c|8 ++ fs/btrfs/tree-log.c|8 +- fs/ubifs/tnc.c | 16 - include/uapi/linux/isdn/capicmd.h |1 kernel/irq/resend.c|2 kernel/module.c| 24 ++-- net/bridge/br_mdb.c|2 net/core/dev.c |2 net/core/skbuff.c | 19 +++ net/ipv4/tcp_input.c |2 net/ipv6/ping.c|2 net/sched/sch_generic.c|9 ++- net/sched/sch_hhf.c|2 net/sctp/protocol.c|2 net/sctp/sm_sideeffect.c |2 net/tipc/name_distr.c |3 - 44 files changed, 378 insertions(+), 118 deletions(-) Alex Williamson (1): PCI: Always allow probing with driver_override Alexander Duyck (1): ixgbe: Prevent u8 wrapping of ITR value to something less than 10us Andrew F. Davis (1): firmware: ti_sci: Always request response from firmware Bjørn Mork (1): cdc_ether: fix rndis support for Mediatek based smartphones Christophe JAILLET (2): ipv6: Fix the link time qualifier of 'ping_v6_proc_exit_net()' sctp: Fix the link time qualifier of 'sctp_ctrlsock_exit()' Christophe Leroy (6): crypto: talitos - check AES key size crypto: talitos - fix CTR alg blocksize crypto: talitos - check data blocksize in ablkcipher. crypto: talitos - fix ECB algs ivsize crypto: talitos - Do not modify req->cryptlen on decryption. crypto: talitos - HMAC SNOOP NO AFEU mode requires SW icv checking. Cong Wang (1): sch_hhf: ensure quantum and hhf_non_hh_weight are non-zero Douglas Anderson (1): clk: rockchip: Don't yell about bad mmc phases when getting Eric Biggers (1): isdn/capi: check message length in capi_write() Eric Dumazet (1): net: sched: fix reordering issues Filipe Manana (1): Btrfs: fix assertion failure during fsync and use of stale transaction Fuqian Huang (1): KVM: x86: work around leak of uninitialized stack contents Greg Kroah-Hartman (1): Linux 4.19.74 Hans de Goede (2): gpiolib: acpi: Add gpiolib_acpi_run_edge_events_on_boot option and blacklist drm: panel-orientation-quirks: Add extra quirk table entry for GPD MicroPC Hui Peng (1): rsi: fix a double free bug in rsi_91x_deinit() Igor Mammedov (1): KVM: s390: kvm_s390_vm_start_migration: check dirty_bitmap before using it as target for memset() Jean Delvare (1): nvmem: Use the same permissions for eeprom as for nvmem Kent Gibson (2): gpio: fix line flag validation in lin
Re: Linux 4.14.145
diff --git a/Makefile b/Makefile index 4aa0dfec9b9b..ce521c48b35e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 144 +SUBLEVEL = 145 EXTRAVERSION = NAME = Petit Gorille diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8d4470f44b74..ae4450e891ab 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -800,7 +800,6 @@ config SIBYTE_SWARM select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT - select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_LITTLESUR bool "Sibyte BCM91250C2-LittleSur" @@ -823,7 +822,6 @@ config SIBYTE_SENTOSA select SYS_HAS_CPU_SB1 select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN - select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SIBYTE_BIGSUR bool "Sibyte BCM91480B-BigSur" @@ -837,7 +835,6 @@ config SIBYTE_BIGSUR select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select ZONE_DMA32 if 64BIT - select SWIOTLB if ARCH_DMA_ADDR_T_64BIT && PCI config SNI_RM bool "SNI RM200/300/400" diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 9e494f8d9c03..af19d1300a4c 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -25,7 +25,17 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -#define raw_smp_processor_id() (current_thread_info()->cpu) +static inline int raw_smp_processor_id(void) +{ +#if defined(__VDSO__) + extern int vdso_smp_processor_id(void) + __compiletime_error("VDSO should not call smp_processor_id()"); + return vdso_smp_processor_id(); +#else + return current_thread_info()->cpu; +#endif +} +#define raw_smp_processor_id raw_smp_processor_id /* Map from cpu id to sequential logical cpu number. This will only not be idempotent when cpus failed to come on-line. */ diff --git a/arch/mips/sibyte/common/Makefile b/arch/mips/sibyte/common/Makefile index 3ef3fb658136..b3d6bf23a662 100644 --- a/arch/mips/sibyte/common/Makefile +++ b/arch/mips/sibyte/common/Makefile @@ -1,5 +1,4 @@ obj-y := cfe.o -obj-$(CONFIG_SWIOTLB) += dma.o obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o obj-$(CONFIG_SIBYTE_CFE_CONSOLE) += cfe_console.o obj-$(CONFIG_SIBYTE_TBPROF)+= sb_tbprof.o diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c deleted file mode 100644 index eb47a94f3583.. --- a/arch/mips/sibyte/common/dma.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * DMA support for Broadcom SiByte platforms. - * - * Copyright (c) 2018 Maciej W. Rozycki - */ - -#include -#include - -void __init plat_swiotlb_setup(void) -{ - swiotlb_init(1); -} diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d1a60690e690..0f455fdf822a 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -7,7 +7,9 @@ ccflags-vdso := \ $(filter -I%,$(KBUILD_CFLAGS)) \ $(filter -E%,$(KBUILD_CFLAGS)) \ $(filter -mmicromips,$(KBUILD_CFLAGS)) \ - $(filter -march=%,$(KBUILD_CFLAGS)) + $(filter -march=%,$(KBUILD_CFLAGS)) \ + $(filter -m%-float,$(KBUILD_CFLAGS)) \ + -D__VDSO__ cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O2 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index bd6d0fb5be9f..51f00c00d7e4 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -280,6 +280,7 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { + barrier_nospec(); return __copy_tofrom_user(to, from, n); } #endif /* __powerpc64__ */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5185be314661..28f3796d23c8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1701,6 +1701,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, case KVM_S390_MCHK: irq->u.mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + irq->u.ext.ext_params = s390int->parm; + irq->u.ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_RESTART: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + break; + default: + return -EINVAL; } return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 70a446ec347d..a27fb640adbe 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/
Linux 5.2.16
I'm announcing the release of the 5.2.16 kernel. All users of the 5.2 kernel series must upgrade. The updated 5.2.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.2.y and can be browsed at the normal kernel.org git web browser: https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile |2 arch/powerpc/include/asm/uaccess.h |1 arch/s390/kvm/interrupt.c | 10 + arch/s390/kvm/kvm-s390.c |4 arch/sparc/kernel/sys_sparc_64.c | 33 ++-- arch/x86/Makefile |1 arch/x86/boot/compressed/acpi.c| 143 +- arch/x86/include/asm/kvm_host.h|2 arch/x86/kernel/ima_arch.c | 12 + arch/x86/kvm/mmu.c | 101 - arch/x86/kvm/svm.c | 42 - arch/x86/kvm/vmx/nested.c | 12 - arch/x86/kvm/x86.c |7 arch/x86/purgatory/Makefile| 35 ++-- drivers/base/core.c| 53 ++ drivers/bluetooth/btusb.c |5 drivers/clk/clk.c | 38 drivers/clk/rockchip/clk-mmc-phase.c |4 drivers/crypto/talitos.c | 70 ++--- drivers/firmware/ti_sci.c |8 - drivers/gpio/gpio-mockup.c |1 drivers/gpio/gpiolib-acpi.c| 42 - drivers/gpio/gpiolib.c | 16 +- drivers/gpu/drm/drm_panel_orientation_quirks.c | 12 + drivers/gpu/drm/i915/intel_dp_mst.c| 10 + drivers/gpu/drm/i915/intel_workarounds.c |5 drivers/gpu/drm/lima/lima_gem.c|2 drivers/gpu/drm/mediatek/mtk_drm_drv.c |5 drivers/gpu/drm/meson/meson_plane.c| 16 ++ drivers/iio/adc/stm32-dfsdm-adc.c | 162 - drivers/iio/adc/stm32-dfsdm.h | 24 ++- drivers/isdn/capi/capi.c | 10 + drivers/mmc/host/bcm2835.c |2 drivers/mmc/host/sdhci-pci-o2micro.c |2 drivers/mmc/host/tmio_mmc.h|1 drivers/mmc/host/tmio_mmc_core.c | 16 +- drivers/mtd/nand/raw/mtk_nand.c| 21 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |7 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 29 +-- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |3 drivers/net/phy/phylink.c |6 drivers/net/tun.c | 16 +- drivers/net/usb/cdc_ether.c| 10 + drivers/net/wireless/mediatek/mt76/mt7615/main.c |5 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c|2 drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c |5 drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 18 -- drivers/net/wireless/rsi/rsi_91x_usb.c |1 drivers/pci/pci-driver.c |3 drivers/platform/x86/pcengines-apuv2.c |2 drivers/platform/x86/pmc_atom.c|8 + drivers/regulator/twl-regulator.c | 23 ++ fs/btrfs/tree-log.c| 16 +- fs/ubifs/tnc.c | 16 +- include/linux/phy_fixed.h |1 include/linux/syscalls.h | 19 ++ include/uapi/asm-generic/unistd.h |2 include/uapi/linux/isdn/capicmd.h |1 ipc/util.h | 25 --- kernel/cgroup/cgroup.c | 10 + kernel/irq/resend.c|2 kernel/module.c| 51 -- mm/z3fold.c|7 net/bridge/br_mdb.c|2 net/core/dev.c |2 net/core/skbuff.c | 19 ++ net/core/sock_map.c|3 net/ipv4/tcp_input.c |2 net/ipv6/ping.c|2 net/ipv6/route.c |8 - net/sched/sch_generic.c|9 - net/sched/sch_hhf.c|2 net/sctp/protocol.c|2 net/sctp/sm_sideeffect.c |2 net/
Re: Linux 5.2.16
diff --git a/Makefile b/Makefile index 3c977aa66650..3cec03e93b40 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 2 -SUBLEVEL = 15 +SUBLEVEL = 16 EXTRAVERSION = NAME = Bobtail Squid diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 76f34346b642..8b03eb44e876 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -312,6 +312,7 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; + barrier_nospec(); allow_user_access(to, from, n); ret = __copy_tofrom_user(to, from, n); prevent_user_access(to, from, n); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9dde4d7d8704..149525b5df1b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1978,6 +1978,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, case KVM_S390_MCHK: irq->u.mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + irq->u.ext.ext_params = s390int->parm; + irq->u.ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_RESTART: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + break; + default: + return -EINVAL; } return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 28ebd647784c..4934141689d2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1013,6 +1013,8 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) /* mark all the pages in active slots as dirty */ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { ms = slots->memslots + slotnr; + if (!ms->dirty_bitmap) + return -EINVAL; /* * The second half of the bitmap is only used on x86, * and would be wasted otherwise, so we put it to good @@ -4325,7 +4327,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; - struct kvm_s390_irq s390irq; + struct kvm_s390_irq s390irq = {}; if (copy_from_user(&s390int, argp, sizeof(s390int))) return -EFAULT; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 9825ca6a6020..5cdca4208647 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -336,25 +336,28 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second { long err; + if (!IS_ENABLED(CONFIG_SYSVIPC)) + return -ENOSYS; + /* No need for backward compatibility. We can start fresh... */ if (call <= SEMTIMEDOP) { switch (call) { case SEMOP: - err = sys_semtimedop(first, ptr, -(unsigned int)second, NULL); + err = ksys_semtimedop(first, ptr, + (unsigned int)second, NULL); goto out; case SEMTIMEDOP: - err = sys_semtimedop(first, ptr, (unsigned int)second, + err = ksys_semtimedop(first, ptr, (unsigned int)second, (const struct __kernel_timespec __user *) -(unsigned long) fifth); + (unsigned long) fifth); goto out; case SEMGET: - err = sys_semget(first, (int)second, (int)third); + err = ksys_semget(first, (int)second, (int)third); goto out; case SEMCTL: { - err = sys_semctl(first, second, -(int)third | IPC_64, -(unsigned long) ptr); + err = ksys_old_semctl(first, second, + (int)third | IPC_64, + (unsigned long) ptr); goto out; } default: @@ -365,18 +368,18 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second if (call <= MSGCTL) { switch (call) { case MSGSND: - err = sys_msgsnd(first, ptr, (size_t)second, + err = ksys_msgsnd(first, ptr, (size_t)second, (int)third); goto out; case MSGRCV: - err = sy
Re: Linux 4.19.74
diff --git a/Makefile b/Makefile index 9748fa3704bc..3509e0c6e5ae 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 73 +SUBLEVEL = 74 EXTRAVERSION = NAME = "People's Front" diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 23bea99bf8d5..1ca9e37f7cc9 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -306,6 +306,7 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { + barrier_nospec(); return __copy_tofrom_user(to, from, n); } #endif /* __powerpc64__ */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcb55b02990e..05ea466b9e40 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1879,6 +1879,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, case KVM_S390_MCHK: irq->u.mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + irq->u.ext.ext_params = s390int->parm; + irq->u.ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_RESTART: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + break; + default: + return -EINVAL; } return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fc7de27960e7..e0551c948c59 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -928,6 +928,8 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) /* mark all the pages in active slots as dirty */ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { ms = slots->memslots + slotnr; + if (!ms->dirty_bitmap) + return -EINVAL; /* * The second half of the bitmap is only used on x86, * and would be wasted otherwise, so we put it to good @@ -3956,7 +3958,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; - struct kvm_s390_irq s390irq; + struct kvm_s390_irq s390irq = {}; if (copy_from_user(&s390int, argp, sizeof(s390int))) return -EFAULT; diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ce0d0424a53d..4833dd7e2cc0 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -38,6 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) +REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) export REALMODE_CFLAGS diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2938b4bcc968..e83f4f6bfdac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8757,6 +8757,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); gva_t gva = 0; struct vmcs12 *vmcs12; + struct x86_exception e; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -8798,8 +8799,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) vmx_instruction_info, true, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - kvm_write_guest_virt_system(vcpu, gva, &field_value, - (is_long_mode(vcpu) ? 8 : 4), NULL); + if (kvm_write_guest_virt_system(vcpu, gva, &field_value, + (is_long_mode(vcpu) ? 8 : 4), + NULL)) + kvm_inject_page_fault(vcpu, &e); } nested_vmx_succeed(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cbc39751f36b..dbae8415cf4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5016,6 +5016,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, /* kvm_write_guest_virt_system can pull in tons of pages. */ vcpu->arch.l1tf_flush_l1d = true; + /* +* FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED +* is returned, but our callers are not ready for that and they blindly +* call kvm_inject_page_fault. Ensure that they at least do not leak +* uninitialized kernel stack memory into cr2 and error code. +*/ +
Visitors List
Hello, Good Day to you! Just wanted to check if you would be interested in acquire Batimat 2019 (International Multi-Specialism Trade Show for the Construction Industry 2019) attendee companies to increase prospect flow at your booth - Product launch- Brand awareness - increase Sales - Annual marketing Etc. Information Provided: - Company name, URL, Contact name, Job title, Phone number, fax number, physical address, Industry, Company size, Email address. We also provide customized lists for all your multi-channel marketing planning. Please advice, so I can provide you available counts and cost information for your approval. Look forward to your reply. Best Regards, Sarah Wilson B2B Marketing & Tradeshow Specialist If you do not wish to receive the attendees list, please reply as 'Opt-out'. --- This email has been checked for viruses by Avast antivirus software. https://www.avast.com/antivirus
RE: Usecases for the per-task latency-nice attribute
From: Tim Chen > Sent: 18 September 2019 18:16 ... > Some users are running machine learning batch tasks with AVX512, and have > observed > that these tasks affect the tasks needing a fast response. They have to > rely on manual CPU affinity to separate these tasks. With appropriate > latency hint on task, the scheduler can be taught to separate them. Will (or can) the scheduler pre-empt a low priority process that is spinning in userspace in order to allow a high priority (or low latency) process run on that cpu? My suspicion is that the process switch can't happen until (at least) the next hardware interrupt - and possibly only a timer tick into the scheduler. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Re: [PATCH] mtd: st_spi_fsm: Use devm_platform_ioremap_resource() in stfsm_probe()
Hi Markus, Markus Elfring wrote on Wed, 18 Sep 2019 14:50:27 +0200: > From: Markus Elfring > Date: Wed, 18 Sep 2019 14:37:34 +0200 > > Simplify this function implementation by using a known wrapper function. > > This issue was detected by using the Coccinelle software. > > Signed-off-by: Markus Elfring > --- > drivers/mtd/devices/st_spi_fsm.c | 8 +--- > 1 file changed, 1 insertion(+), 7 deletions(-) > > diff --git a/drivers/mtd/devices/st_spi_fsm.c > b/drivers/mtd/devices/st_spi_fsm.c > index f4d1667daaf9..5bd1c44ae529 100644 > --- a/drivers/mtd/devices/st_spi_fsm.c > +++ b/drivers/mtd/devices/st_spi_fsm.c > @@ -2034,13 +2034,7 @@ static int stfsm_probe(struct platform_device *pdev) > > platform_set_drvdata(pdev, fsm); > > - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > - if (!res) { > - dev_err(&pdev->dev, "Resource not found\n"); > - return -ENODEV; > - } > - > - fsm->base = devm_ioremap_resource(&pdev->dev, res); > + fsm->base = devm_platform_ioremap_resource(pdev, 0); > if (IS_ERR(fsm->base)) { > dev_err(&pdev->dev, > "Failed to reserve memory region %pR\n", res); > -- > 2.23.0 > Is this even compiled tested? 'res' is not initialized anymore so you can't use it in the error trace. I suppose you should even drop it from the stack parameters. Thanks, Miquèl
[PATCH v3 03/11] dt-bindings: phy-mtk-tphy: remove unused u3phya_ref clock
The u3phya_ref clock is already moved into sub-node, and renamed as ref clock, no used anymore now, so remove it to avoid confusion Signed-off-by: Chunfeng Yun Reviewed-by: Rob Herring --- v3: no changes v2: add Reviewed-by Rob --- Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt | 4 1 file changed, 4 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt index 1f4a36dd80e0..48bc1a2e9299 100644 --- a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt +++ b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt @@ -13,10 +13,6 @@ Required properties (controller (parent) node): "mediatek,mt8173-u3phy"; make use of "mediatek,generic-tphy-v1" on mt2701 instead and "mediatek,generic-tphy-v2" on mt2712 instead. - - clocks : (deprecated, use port's clocks instead) a list of phandle + - clock-specifier pairs, one for each entry in clock-names - - clock-names : (deprecated, use port's one instead) must contain - "u3phya_ref": for reference clock of usb3.0 analog phy. Required nodes : a sub-node is required for each port the controller provides. Address range information including the usual -- 2.23.0
[PATCH v3 02/11] dt-bindings: phy-mtk-tphy: make the ref clock optional
Make the ref clock optional, then we no need refer to a fixed-clock in DTS anymore when the clock of USB3 PHY comes from oscillator directly Signed-off-by: Chunfeng Yun Acked-by: Rob Herring --- v3: add acked-by Rob v2: no changes --- .../devicetree/bindings/phy/phy-mtk-tphy.txt| 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt index ce6abfbdfbe1..1f4a36dd80e0 100644 --- a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt +++ b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt @@ -34,12 +34,6 @@ Optional properties (controller (parent) node): Required properties (port (child) node): - reg : address and length of the register set for the port. -- clocks : a list of phandle + clock-specifier pairs, one for each - entry in clock-names -- clock-names : must contain - "ref": 48M reference clock for HighSpeed analog phy; and 26M - reference clock for SuperSpeed analog phy, sometimes is - 24M, 25M or 27M, depended on platform. - #phy-cells : should be 1 (See second example) cell after port phandle is phy type from: - PHY_TYPE_USB2 @@ -48,6 +42,13 @@ Required properties (port (child) node): - PHY_TYPE_SATA Optional properties (PHY_TYPE_USB2 port (child) node): +- clocks : a list of phandle + clock-specifier pairs, one for each + entry in clock-names +- clock-names : may contain + "ref": 48M reference clock for HighSpeed anolog phy; and 26M + reference clock for SuperSpeed anolog phy, sometimes is + 24M, 25M or 27M, depended on platform. + - mediatek,eye-src : u32, the value of slew rate calibrate - mediatek,eye-vrt : u32, the selection of VRT reference voltage - mediatek,eye-term: u32, the selection of HS_TX TERM reference voltage -- 2.23.0
[PATCH v3 09/11] phy: phy-mtk-tphy: remove unused u3phya_ref clock
The u3phya_ref clock is already moved into sub-node, and renamed as ref clock, no used anymore now, so remove it, this can avoid confusion when support new platforms Signed-off-by: Chunfeng Yun --- v2~3: no changes --- drivers/phy/mediatek/phy-mtk-tphy.c | 18 -- 1 file changed, 18 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 96c62e3a3300..c6424fd2a06d 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -312,8 +312,6 @@ struct mtk_phy_instance { struct mtk_tphy { struct device *dev; void __iomem *sif_base; /* only shared sif */ - /* deprecated, use @ref_clk instead in phy instance */ - struct clk *u3phya_ref; /* reference clock of usb3 anolog phy */ const struct mtk_phy_pdata *pdata; struct mtk_phy_instance **phys; int nphys; @@ -921,12 +919,6 @@ static int mtk_phy_init(struct phy *phy) struct mtk_tphy *tphy = dev_get_drvdata(phy->dev.parent); int ret; - ret = clk_prepare_enable(tphy->u3phya_ref); - if (ret) { - dev_err(tphy->dev, "failed to enable u3phya_ref\n"); - return ret; - } - ret = clk_prepare_enable(instance->ref_clk); if (ret) { dev_err(tphy->dev, "failed to enable ref_clk\n"); @@ -992,7 +984,6 @@ static int mtk_phy_exit(struct phy *phy) u2_phy_instance_exit(tphy, instance); clk_disable_unprepare(instance->ref_clk); - clk_disable_unprepare(tphy->u3phya_ref); return 0; } @@ -1127,11 +1118,6 @@ static int mtk_tphy_probe(struct platform_device *pdev) } } - /* it's deprecated, make it optional for backward compatibility */ - tphy->u3phya_ref = devm_clk_get_optional(dev, "u3phya_ref"); - if (IS_ERR(tphy->u3phya_ref)) - return PTR_ERR(tphy->u3phya_ref); - tphy->src_ref_clk = U3P_REF_CLK; tphy->src_coef = U3P_SLEW_RATE_COEF; /* update parameters of slew rate calibrate if exist */ @@ -1178,10 +1164,6 @@ static int mtk_tphy_probe(struct platform_device *pdev) phy_set_drvdata(phy, instance); port++; - /* if deprecated clock is provided, ignore instance's one */ - if (tphy->u3phya_ref) - continue; - instance->ref_clk = devm_clk_get_optional(&phy->dev, "ref"); if (IS_ERR(instance->ref_clk)) { dev_err(dev, "failed to get ref_clk(id-%d)\n", port); -- 2.23.0
[PATCH v3 07/11] phy: phy-mtk-tphy: add a property for internal resistance
This is used to tune J-K voltage by internal R (resistance) Signed-off-by: Chunfeng Yun --- v3: change commit log v2: no changes --- drivers/phy/mediatek/phy-mtk-tphy.c | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 5afe33621dbc..4a2dc92f10f5 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -43,6 +43,8 @@ #define PA0_RG_USB20_INTR_EN BIT(5) #define U3P_USBPHYACR1 0x004 +#define PA1_RG_INTR_CALGENMASK(23, 19) +#define PA1_RG_INTR_CAL_VAL(x) ((0x1f & (x)) << 19) #define PA1_RG_VRT_SEL GENMASK(14, 12) #define PA1_RG_VRT_SEL_VAL(x) ((0x7 & (x)) << 12) #define PA1_RG_TERM_SELGENMASK(10, 8) @@ -302,6 +304,7 @@ struct mtk_phy_instance { int eye_src; int eye_vrt; int eye_term; + int intr; int discth; bool bc12_en; }; @@ -853,12 +856,14 @@ static void phy_parse_property(struct mtk_tphy *tphy, &instance->eye_vrt); device_property_read_u32(dev, "mediatek,eye-term", &instance->eye_term); + device_property_read_u32(dev, "mediatek,intr", +&instance->intr); device_property_read_u32(dev, "mediatek,discth", &instance->discth); - dev_dbg(dev, "bc12:%d, src:%d, vrt:%d, term:%d, disc:%d\n", + dev_dbg(dev, "bc12:%d, src:%d, vrt:%d, term:%d, intr:%d, disc:%d\n", instance->bc12_en, instance->eye_src, instance->eye_vrt, instance->eye_term, - instance->discth); + instance->intr, instance->discth); } static void u2_phy_props_set(struct mtk_tphy *tphy, @@ -895,6 +900,13 @@ static void u2_phy_props_set(struct mtk_tphy *tphy, writel(tmp, com + U3P_USBPHYACR1); } + if (instance->intr) { + tmp = readl(com + U3P_USBPHYACR1); + tmp &= ~PA1_RG_INTR_CAL; + tmp |= PA1_RG_INTR_CAL_VAL(instance->intr); + writel(tmp, com + U3P_USBPHYACR1); + } + if (instance->discth) { tmp = readl(com + U3P_USBPHYACR6); tmp &= ~PA6_RG_U2_DISCTH; -- 2.23.0
[PATCH v3 01/11] dt-bindings: phy-mtk-tphy: add two optional properties for u2phy
Add two optional properties, one for tuning J-K voltage by INTR, another for disconnect threshold, both of them are related with connect detection Signed-off-by: Chunfeng Yun --- v3: change commit log v2: change description --- Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt index a5f7a4f0dbc1..ce6abfbdfbe1 100644 --- a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt +++ b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt @@ -52,6 +52,8 @@ Optional properties (PHY_TYPE_USB2 port (child) node): - mediatek,eye-vrt : u32, the selection of VRT reference voltage - mediatek,eye-term: u32, the selection of HS_TX TERM reference voltage - mediatek,bc12: bool, enable BC12 of u2phy if support it +- mediatek,discth : u32, the selection of disconnect threshold +- mediatek,intr: u32, the selection of internal R (resistance) Example: -- 2.23.0
[PATCH v3 10/11] phy: phy-mtk-tphy: add a new reference clock
Usually the digital and analog phys use the same reference clock, but some platforms have two separate reference clocks for each of them, so add another optional clock to support them. In order to keep the clock names consistent with PHY IP's, change the da_ref for analog phy and ref clock for digital phy. Signed-off-by: Chunfeng Yun --- v3: no changes v2: fix typo of analog --- drivers/phy/mediatek/phy-mtk-tphy.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index c6424fd2a06d..cdbcc49f7115 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -298,7 +298,8 @@ struct mtk_phy_instance { struct u2phy_banks u2_banks; struct u3phy_banks u3_banks; }; - struct clk *ref_clk;/* reference clock of anolog phy */ + struct clk *ref_clk;/* reference clock of (digital) phy */ + struct clk *da_ref_clk; /* reference clock of analog phy */ u32 index; u8 type; int eye_src; @@ -925,6 +926,13 @@ static int mtk_phy_init(struct phy *phy) return ret; } + ret = clk_prepare_enable(instance->da_ref_clk); + if (ret) { + dev_err(tphy->dev, "failed to enable da_ref\n"); + clk_disable_unprepare(instance->ref_clk); + return ret; + } + switch (instance->type) { case PHY_TYPE_USB2: u2_phy_instance_init(tphy, instance); @@ -984,6 +992,7 @@ static int mtk_phy_exit(struct phy *phy) u2_phy_instance_exit(tphy, instance); clk_disable_unprepare(instance->ref_clk); + clk_disable_unprepare(instance->da_ref_clk); return 0; } @@ -1170,6 +1179,14 @@ static int mtk_tphy_probe(struct platform_device *pdev) retval = PTR_ERR(instance->ref_clk); goto put_child; } + + instance->da_ref_clk = + devm_clk_get_optional(&phy->dev, "da_ref"); + if (IS_ERR(instance->da_ref_clk)) { + dev_err(dev, "failed to get da_ref_clk(id-%d)\n", port); + retval = PTR_ERR(instance->da_ref_clk); + goto put_child; + } } provider = devm_of_phy_provider_register(dev, mtk_phy_xlate); -- 2.23.0
[PATCH v3 04/11] dt-bindings: phy-mtk-tphy: add a new reference clock
Usually the digital and analog phys use the same reference clock, but on some platforms, they are separated, so add another optional clock to support it. In order to keep the clock names consistent with PHY IP's, use the da_ref for analog phy and ref clock for digital phy. Signed-off-by: Chunfeng Yun Acked-by: Rob Herring --- v3: add acked-by Rob v2: fix typo of analog and needed --- Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt index 48bc1a2e9299..a859b0db4051 100644 --- a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt +++ b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt @@ -41,9 +41,12 @@ Optional properties (PHY_TYPE_USB2 port (child) node): - clocks : a list of phandle + clock-specifier pairs, one for each entry in clock-names - clock-names : may contain - "ref": 48M reference clock for HighSpeed anolog phy; and 26M - reference clock for SuperSpeed anolog phy, sometimes is + "ref": 48M reference clock for HighSpeed (digital) phy; and 26M + reference clock for SuperSpeed (digital) phy, sometimes is 24M, 25M or 27M, depended on platform. + "da_ref": the reference clock of analog phy, used if the clocks + of analog and digital phys are separated, otherwise uses + "ref" clock only if needed. - mediatek,eye-src : u32, the value of slew rate calibrate - mediatek,eye-vrt : u32, the selection of VRT reference voltage -- 2.23.0
[PATCH v3 11/11] arm64: dts: mt2712: use non-empty ranges for usb-phy
Use non-empty ranges for usb-phy to make the layout of its registers clearer; Replace deprecated compatible by generic Signed-off-by: Chunfeng Yun --- v3: no changes v2: use generic compatible --- arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 42 --- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 43307bad3f0d..e24f2f2f6004 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -697,30 +697,31 @@ }; u3phy0: usb-phy@1129 { - compatible = "mediatek,mt2712-u3phy"; - #address-cells = <2>; - #size-cells = <2>; - ranges; + compatible = "mediatek,mt2712-tphy", +"mediatek,generic-tphy-v2"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0x1129 0x9000>; status = "okay"; - u2port0: usb-phy@1129 { - reg = <0 0x1129 0 0x700>; + u2port0: usb-phy@0 { + reg = <0x0 0x700>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; - u2port1: usb-phy@11298000 { - reg = <0 0x11298000 0 0x700>; + u2port1: usb-phy@8000 { + reg = <0x8000 0x700>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; - u3port0: usb-phy@11298700 { - reg = <0 0x11298700 0 0x900>; + u3port0: usb-phy@8700 { + reg = <0x8700 0x900>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; @@ -760,30 +761,31 @@ }; u3phy1: usb-phy@112e { - compatible = "mediatek,mt2712-u3phy"; - #address-cells = <2>; - #size-cells = <2>; - ranges; + compatible = "mediatek,mt2712-tphy", +"mediatek,generic-tphy-v2"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0x112e 0x9000>; status = "okay"; - u2port2: usb-phy@112e { - reg = <0 0x112e 0 0x700>; + u2port2: usb-phy@0 { + reg = <0x0 0x700>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; - u2port3: usb-phy@112e8000 { - reg = <0 0x112e8000 0 0x700>; + u2port3: usb-phy@8000 { + reg = <0x8000 0x700>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; - u3port1: usb-phy@112e8700 { - reg = <0 0x112e8700 0 0x900>; + u3port1: usb-phy@8700 { + reg = <0x8700 0x900>; clocks = <&clk26m>; clock-names = "ref"; #phy-cells = <1>; -- 2.23.0
[PATCH v3 05/11] dt-bindings: phy-mtk-tphy: add the properties about address mapping
Add three required properties about the address mapping, including '#address-cells', '#size-cells' and 'ranges' Signed-off-by: Chunfeng Yun Reviewed-by: Rob Herring --- v3: no changes v2: add Reviewed-by Rob --- Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt | 10 ++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt index a859b0db4051..dd75b676b71d 100644 --- a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt +++ b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt @@ -14,6 +14,16 @@ Required properties (controller (parent) node): make use of "mediatek,generic-tphy-v1" on mt2701 instead and "mediatek,generic-tphy-v2" on mt2712 instead. +- #address-cells: the number of cells used to represent physical + base addresses. +- #size-cells: the number of cells used to represent the size of an address. +- ranges: the address mapping relationship to the parent, defined with + - empty value: if optional 'reg' is used. + - non-empty value: if optional 'reg' is not used. should set + the child's base address to 0, the physical address + within parent's address space, and the length of + the address map. + Required nodes : a sub-node is required for each port the controller provides. Address range information including the usual 'reg' property is used inside these nodes to describe -- 2.23.0
[PATCH v3 06/11] phy: phy-mtk-tphy: add a property for disconnect threshold
This is used to tune the threshold of disconnect Signed-off-by: Chunfeng Yun --- v2~3: no changes --- drivers/phy/mediatek/phy-mtk-tphy.c | 17 +++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index cb2ed3b25068..5afe33621dbc 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -60,6 +60,8 @@ #define U3P_USBPHYACR6 0x018 #define PA6_RG_U2_BC11_SW_EN BIT(23) #define PA6_RG_U2_OTG_VBUSCMP_EN BIT(20) +#define PA6_RG_U2_DISCTH GENMASK(7, 4) +#define PA6_RG_U2_DISCTH_VAL(x)((0xf & (x)) << 4) #define PA6_RG_U2_SQTH GENMASK(3, 0) #define PA6_RG_U2_SQTH_VAL(x) (0xf & (x)) @@ -300,6 +302,7 @@ struct mtk_phy_instance { int eye_src; int eye_vrt; int eye_term; + int discth; bool bc12_en; }; @@ -850,9 +853,12 @@ static void phy_parse_property(struct mtk_tphy *tphy, &instance->eye_vrt); device_property_read_u32(dev, "mediatek,eye-term", &instance->eye_term); - dev_dbg(dev, "bc12:%d, src:%d, vrt:%d, term:%d\n", + device_property_read_u32(dev, "mediatek,discth", +&instance->discth); + dev_dbg(dev, "bc12:%d, src:%d, vrt:%d, term:%d, disc:%d\n", instance->bc12_en, instance->eye_src, - instance->eye_vrt, instance->eye_term); + instance->eye_vrt, instance->eye_term, + instance->discth); } static void u2_phy_props_set(struct mtk_tphy *tphy, @@ -888,6 +894,13 @@ static void u2_phy_props_set(struct mtk_tphy *tphy, tmp |= PA1_RG_TERM_SEL_VAL(instance->eye_term); writel(tmp, com + U3P_USBPHYACR1); } + + if (instance->discth) { + tmp = readl(com + U3P_USBPHYACR6); + tmp &= ~PA6_RG_U2_DISCTH; + tmp |= PA6_RG_U2_DISCTH_VAL(instance->discth); + writel(tmp, com + U3P_USBPHYACR6); + } } static int mtk_phy_init(struct phy *phy) -- 2.23.0
[PATCH v3 08/11] phy: phy-mtk-tphy: make the ref clock optional
Sometimes the reference clock of USB3 PHY comes from oscillator directly, and no need refer to a fixed-clock in DTS anymore if make it optional. Signed-off-by: Chunfeng Yun --- v2~3: no changes --- drivers/phy/mediatek/phy-mtk-tphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 4a2dc92f10f5..96c62e3a3300 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -1182,7 +1182,7 @@ static int mtk_tphy_probe(struct platform_device *pdev) if (tphy->u3phya_ref) continue; - instance->ref_clk = devm_clk_get(&phy->dev, "ref"); + instance->ref_clk = devm_clk_get_optional(&phy->dev, "ref"); if (IS_ERR(instance->ref_clk)) { dev_err(dev, "failed to get ref_clk(id-%d)\n", port); retval = PTR_ERR(instance->ref_clk); -- 2.23.0
Re: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM SoC
Hi Thomas, On 9/19/2019 4:36 PM, Langer, Thomas wrote: Hi Dilip, -Original Message- From: devicetree-ow...@vger.kernel.org On Behalf Of Dilip Kota Sent: Donnerstag, 19. September 2019 10:06 To: Martin Blumenstingl Cc: Chuan Hua, Lei ; Kim, Cheol Yong ; devicet...@vger.kernel.org; linux- ker...@vger.kernel.org; p.za...@pengutronix.de; Wu, Qiming ; r...@kernel.org; Hauke Mehrtens Subject: Re: [PATCH v2 2/2] reset: Reset controller driver for Intel LGM SoC Hi Martin, On 9/12/2019 2:38 PM, Dilip Kota wrote: Re-sending the mail, because of delivery failure. sorry for the spam. Hi Martin, On 9/6/2019 4:53 AM, Martin Blumenstingl wrote: Hi, On Thu, Sep 5, 2019 at 4:38 AM Chuan Hua, Lei wrote: [...] I'm not surprised that we got some of the IP block layout for the VRX200 RCU "wrong" - all "documentation" we have is the old Lantiq UGW (BSP). with proper documentation (as in a "public datasheet for the SoC") it would be easy to spot these mistakes (at least I assume that the quality of the Infineon / Lantiq datasheets is excellent). back to reset-intel-syscon: assigning only one job to the RCU hardware is a good idea (in my opinion). that brings up a question: why do we need the "syscon" compatible for the RCU node? this is typically used when registers are accessed by another IP block and the other driver has to access these registers as well. does this mean that there's more hidden in the RCU registers? As I mentioned, some other misc registers are put into RCU even they don't belong to reset functions. OK, just be aware that there are also rules for syscon compatible drivers, see for example: [0] if Rob (dt-bindings maintainer) is happy with the documentation in patch 1 then I'm fine with it as well. for my own education I would appreciate if you could describe these "other misc registers" with a few sentences (I assume that this can also help Rob) For LGM, RCU is clean. There would be no MISC register after software's feedback. These misc registers will be moved to chiptop/misc groups(implemented by syscon). For legacy SoC, we do have a lot MISC registers for different SoCs. OK, I think I understand now: chiptop != RCU so RCU really only has one purpose: handling resets while chiptop manages all the random bits does this means we don't need RCU to match "syscon"? If we don't support legacy SoC with the same driver, we don't need syscon, just regmap. Regmap is a must for us since we will use regmap proxy to implement secure rest via secure processor. I think we should drop the syscon compatible for LGM then even for the legacy SoCs the reset controller should not have a syscon compatible: instead it should have a syscon parent (as the current "lantiq,xrx200-reset" binding requires and as suggested by Rob for another IP block: [0]) I am not sure if syscon parent really matches hardware implementation. In all our Networking SoCs, chiptop is kind of misc register collection. Some registers can't belong to any particular group, or they need to work together with other modules(therefore, these misc registers would be accessed by two or more modules). However, chiptop is not a hardware module. indeed, chiptop should not have any child nodes (based on your explanation). I was referring to VRX200 where the RCU syscon has various children (one child node for each hardware module that's part of RCU: reset controller, 2x USB PHY, ...) back to LGM: you said that the LGM RCU registers only contain the reset controller. thus I see no need for the syscon compatible keeping regmap is great in my opinion because it's a nice API and gets rid of some boilerplate even better if it makes things easier for accessing the secure processor [...] 4. Code not optimized and intel internal review not assessed. insights from you (like the issue with the reset callback) are very valuable - this shows that we should focus on having one driver. Based on the above findings, I would suggest reset-lantiq.c to move to reset-intel-syscon.c my concern with having two separate drivers is that it will be hard to migrate from reset-lantiq to the "optimized" reset-intel-syscon driver. I don't have access to the datasheets for the any Lantiq/Intel SoC (VRX200 and even older). so debugging issues after switching from one driver to another is tedious because I cannot tell which part of the driver is causing a problem (it's either "all code from driver A" vs "all code from driver B", meaning it's hard to narrow it down). with separate commits/patches that are improving the reset-lantiq driver I can do git bisect to find the cause of a problem on the older SoCs (VRX200 for example) Our internal version supports XRX350/XRX500/PRX300(MIPS based) and latest Lighting Mountain(X86 based). Migration to reset-intel-syscon.c should be straight forward. what about the _reset callback on the XRX350/XRX500/PRX300 SoCs - do they only use level resets (_assert and _deassert) or are some reset lines using re
[PATCH -net] zd1211rw: zd_usb: Use "%zu" to format size_t
On 32-bit: drivers/net/wireless/zydas/zd1211rw/zd_usb.c: In function ‘check_read_regs’: drivers/net/wireless/zydas/zd1211rw/zd_def.h:18:25: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 6 has type ‘size_t’ {aka ‘unsigned int’} [-Wformat=] dev_printk(level, dev, "%s() " fmt, __func__, ##args) ^~~ drivers/net/wireless/zydas/zd1211rw/zd_def.h:22:4: note: in expansion of macro ‘dev_printk_f’ dev_printk_f(KERN_DEBUG, dev, fmt, ## args) ^~~~ drivers/net/wireless/zydas/zd1211rw/zd_usb.c:1635:3: note: in expansion of macro ‘dev_dbg_f’ dev_dbg_f(zd_usb_dev(usb), ^ drivers/net/wireless/zydas/zd1211rw/zd_usb.c:1636:51: note: format string is defined here "error: actual length %d less than expected %ld\n", ~~^ %d Fixes: 84b0b66352470e64 ("zd1211rw: zd_usb: Use struct_size() helper") Signed-off-by: Geert Uytterhoeven --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 4e44ea8c652d65aa..7b5c2fe5bd4d9cde 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1633,7 +1633,7 @@ static bool check_read_regs(struct zd_usb *usb, struct usb_req_read_regs *req, */ if (rr->length < struct_size(regs, regs, count)) { dev_dbg_f(zd_usb_dev(usb), -"error: actual length %d less than expected %ld\n", +"error: actual length %d less than expected %zu\n", rr->length, struct_size(regs, regs, count)); return false; } -- 2.17.1
[PATCH v8 1/7] nfc: pn533: i2c: "pn532" as dt compatible string
It is favourable to have one unified compatible string for devices that have multiple interfaces. So this adds simply "pn532" as the devicetree binding compatible string and makes a note that the old ones are deprecated. Cc: Johan Hovold Cc: Simon Horman Signed-off-by: Lars Poeschel --- Changes in v6: - Rebased the patch series on v5.3-rc5 Changes in v3: - This patch is new in v3 drivers/nfc/pn533/i2c.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c index 1832cd921ea7..1abd40398a5a 100644 --- a/drivers/nfc/pn533/i2c.c +++ b/drivers/nfc/pn533/i2c.c @@ -245,6 +245,11 @@ static int pn533_i2c_remove(struct i2c_client *client) } static const struct of_device_id of_pn533_i2c_match[] = { + { .compatible = "nxp,pn532", }, + /* +* NOTE: The use of the compatibles with the trailing "...-i2c" is +* deprecated and will be removed. +*/ { .compatible = "nxp,pn533-i2c", }, { .compatible = "nxp,pn532-i2c", }, {}, -- 2.23.0
[PATCH v8 4/7] nfc: pn533: Split pn533 init & nfc_register
There is a problem in the initialisation and setup of the pn533: It registers with nfc too early. It could happen, that it finished registering with nfc and someone starts using it. But setup of the pn533 is not yet finished. Bad or at least unintended things could happen. So I split out nfc registering (and unregistering) to seperate functions that have to be called late in probe then. Cc: Johan Hovold Cc: Claudiu Beznea Signed-off-by: Lars Poeschel --- Changes in v7: - Remove an unneeded rc variable initialization - Corrected goto error to err_clean in pn533_usb_probe Changes in v6: - Rebased the patch series on v5.3-rc5 Changes in v5: - This patch is new in v5 drivers/nfc/pn533/i2c.c | 17 +- drivers/nfc/pn533/pn533.c | 66 --- drivers/nfc/pn533/pn533.h | 11 --- drivers/nfc/pn533/usb.c | 14 ++--- 4 files changed, 60 insertions(+), 48 deletions(-) diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c index 1abd40398a5a..e9e5a1ec8857 100644 --- a/drivers/nfc/pn533/i2c.c +++ b/drivers/nfc/pn533/i2c.c @@ -193,12 +193,10 @@ static int pn533_i2c_probe(struct i2c_client *client, phy->i2c_dev = client; i2c_set_clientdata(client, phy); - priv = pn533_register_device(PN533_DEVICE_PN532, -PN533_NO_TYPE_B_PROTOCOLS, + priv = pn53x_common_init(PN533_DEVICE_PN532, PN533_PROTO_REQ_ACK_RESP, phy, &i2c_phy_ops, NULL, -&phy->i2c_dev->dev, -&client->dev); +&phy->i2c_dev->dev); if (IS_ERR(priv)) { r = PTR_ERR(priv); @@ -220,13 +218,17 @@ static int pn533_i2c_probe(struct i2c_client *client, if (r) goto fn_setup_err; - return 0; + r = pn53x_register_nfc(priv, PN533_NO_TYPE_B_PROTOCOLS, &client->dev); + if (r) + goto fn_setup_err; + + return r; fn_setup_err: free_irq(client->irq, phy); irq_rqst_err: - pn533_unregister_device(phy->priv); + pn53x_common_clean(phy->priv); return r; } @@ -239,7 +241,8 @@ static int pn533_i2c_remove(struct i2c_client *client) free_irq(client->irq, phy); - pn533_unregister_device(phy->priv); + pn53x_unregister_nfc(phy->priv); + pn53x_common_clean(phy->priv); return 0; } diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index 64836c727aee..e5d5e4c83a04 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2590,14 +2590,12 @@ int pn533_finalize_setup(struct pn533 *dev) } EXPORT_SYMBOL_GPL(pn533_finalize_setup); -struct pn533 *pn533_register_device(u32 device_type, - u32 protocols, +struct pn533 *pn53x_common_init(u32 device_type, enum pn533_protocol_type protocol_type, void *phy, struct pn533_phy_ops *phy_ops, struct pn533_frame_ops *fops, - struct device *dev, - struct device *parent) + struct device *dev) { struct pn533 *priv; int rc = -ENOMEM; @@ -2638,43 +2636,18 @@ struct pn533 *pn533_register_device(u32 device_type, skb_queue_head_init(&priv->fragment_skb); INIT_LIST_HEAD(&priv->cmd_queue); - - priv->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, - priv->ops->tx_header_len + - PN533_CMD_DATAEXCH_HEAD_LEN, - priv->ops->tx_tail_len); - if (!priv->nfc_dev) { - rc = -ENOMEM; - goto destroy_wq; - } - - nfc_set_parent_dev(priv->nfc_dev, parent); - nfc_set_drvdata(priv->nfc_dev, priv); - - rc = nfc_register_device(priv->nfc_dev); - if (rc) - goto free_nfc_dev; - return priv; -free_nfc_dev: - nfc_free_device(priv->nfc_dev); - -destroy_wq: - destroy_workqueue(priv->wq); error: kfree(priv); return ERR_PTR(rc); } -EXPORT_SYMBOL_GPL(pn533_register_device); +EXPORT_SYMBOL_GPL(pn53x_common_init); -void pn533_unregister_device(struct pn533 *priv) +void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; - nfc_unregister_device(priv->nfc_dev); - nfc_free_device(priv->nfc_dev); - flush_delayed_work(&priv->poll_work); destroy_workqueue(priv->wq); @@ -2689,8 +2662,37 @@ void pn533_unregister_device(struct pn533 *priv) kfree(priv); } -EXPORT_SYMBOL_GPL(pn533_unregister_device); +EXPORT_SYMBOL_GPL(pn53x_common_clean); + +int pn53x_register_nfc(struct pn533 *priv, u32 protocols, +