Re: [PATCH] sdhci: arasan: Add runtime PM support

2018-09-25 Thread Ulf Hansson
On 18 September 2018 at 17:04, Manish Narani  wrote:
> Add runtime PM support in Arasan SDHCI driver.

According to the patch you seem to deploy support for it, but since
you call pm_runtime_forbid() in ->probe(), this means in practice that
the code becomes unused, at least until user-space decides to change
it.

Does it mean that you haven't tested the code or that there are some problems?

In either case, it would be good to know so please add this
information to the changelog.

>
> Signed-off-by: Manish Narani 
> ---
>  drivers/mmc/host/sdhci-of-arasan.c | 80 
> +-
>  1 file changed, 78 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci-of-arasan.c 
> b/drivers/mmc/host/sdhci-of-arasan.c
> index a40bcc2..370ada5 100644
> --- a/drivers/mmc/host/sdhci-of-arasan.c
> +++ b/drivers/mmc/host/sdhci-of-arasan.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -30,6 +31,7 @@
>  #include "cqhci.h"
>  #include "sdhci-pltfm.h"
>
> +#define SDHCI_ARASAN_AUTOSUSPEND_DELAY 2000 /* ms */
>  #define SDHCI_ARASAN_VENDOR_REGISTER   0x78
>  #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200
>  #define VENDOR_ENHANCED_STROBE BIT(0)
> @@ -363,6 +365,70 @@ static const struct sdhci_pltfm_data 
> sdhci_arasan_cqe_pdata = {
> SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
>  };
>
> +#ifdef CONFIG_PM
> +static int sdhci_arasan_runtime_suspend(struct device *dev)
> +{
> +   struct platform_device *pdev = to_platform_device(dev);
> +   struct sdhci_host *host = platform_get_drvdata(pdev);
> +   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
> +   struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
> +   int ret;
> +
> +   if (sdhci_arasan->has_cqe) {
> +   ret = cqhci_suspend(host->mmc);
> +   if (ret)
> +   return ret;
> +   }
> +
> +   ret = sdhci_runtime_suspend_host(host);
> +   if (ret)
> +   return ret;
> +
> +   if (host->tuning_mode != SDHCI_TUNING_MODE_3)
> +   mmc_retune_needed(host->mmc);
> +
> +   clk_disable(pltfm_host->clk);
> +   clk_disable(sdhci_arasan->clk_ahb);
> +
> +   return 0;
> +}
> +
> +static int sdhci_arasan_runtime_resume(struct device *dev)
> +{
> +   struct platform_device *pdev = to_platform_device(dev);
> +   struct sdhci_host *host = platform_get_drvdata(pdev);
> +   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
> +   struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
> +   int ret;
> +
> +   ret = clk_enable(sdhci_arasan->clk_ahb);
> +   if (ret) {
> +   dev_err(dev, "Cannot enable AHB clock.\n");
> +   return ret;
> +   }
> +
> +   ret = clk_enable(pltfm_host->clk);
> +   if (ret) {
> +   dev_err(dev, "Cannot enable SD clock.\n");
> +   return ret;
> +   }
> +
> +   ret = sdhci_runtime_resume_host(host);
> +   if (ret)
> +   goto out;
> +
> +   if (sdhci_arasan->has_cqe)
> +   return cqhci_resume(host->mmc);
> +
> +   return 0;
> +out:
> +   clk_disable(pltfm_host->clk);
> +   clk_disable(sdhci_arasan->clk_ahb);
> +
> +   return ret;
> +}
> +#endif /* ! CONFIG_PM */
> +
>  #ifdef CONFIG_PM_SLEEP
>  /**
>   * sdhci_arasan_suspend - Suspend method for the driver
> @@ -455,8 +521,10 @@ static int sdhci_arasan_resume(struct device *dev)
>  }
>  #endif /* ! CONFIG_PM_SLEEP */
>
> -static SIMPLE_DEV_PM_OPS(sdhci_arasan_dev_pm_ops, sdhci_arasan_suspend,
> -sdhci_arasan_resume);
> +static const struct dev_pm_ops sdhci_arasan_dev_pm_ops = {
> +   SET_SYSTEM_SLEEP_PM_OPS(sdhci_arasan_suspend, sdhci_arasan_resume)
> +   SET_RUNTIME_PM_OPS(sdhci_arasan_runtime_suspend,
> +  sdhci_arasan_runtime_resume, NULL) };
>
>  static const struct of_device_id sdhci_arasan_of_match[] = {
> /* SoC-specific compatible strings w/ soc_ctl_map */
> @@ -821,6 +889,14 @@ static int sdhci_arasan_probe(struct platform_device 
> *pdev)
> if (ret)
> goto err_add_host;
>
> +   pm_runtime_set_active(&pdev->dev);
> +   pm_runtime_enable(&pdev->dev);
> +   pm_runtime_set_autosuspend_delay(&pdev->dev,
> +SDHCI_ARASAN_AUTOSUSPEND_DELAY);
> +   pm_runtime_mark_last_busy(&pdev->dev);
> +   pm_runtime_use_autosuspend(&pdev->dev);
> +   pm_runtime_forbid(&pdev->dev);

When sdhci_arasan_suspend|resume() gets called, which calls into
sdhci_suspend|resume_host(), we requires that the host is already
runtime resumed. I am guessing that's why you are calling
pm_runtime_forbid() here?

To me, it looks like you may want to look into using
pm_runtime_force_suspend|resume() from the system suspend/resume
callbacks. The tricky part seems to be to man

Re: [LKP] [mm, oom] 6209f6fc62: general_protection_fault:#[##]

2018-09-25 Thread Rong Chen




On 09/25/2018 02:06 PM, Michal Hocko wrote:

On Tue 25-09-18 13:48:20, kernel test robot wrote:

FYI, we noticed the following commit (built with gcc-7):

commit: 6209f6fc62835d84c2a92d237588a114e39436ce ("mm, oom: rework mmap_exit vs. 
oom_reaper synchronization")
https://github.com/0day-ci/linux 
UPDATE-20180911-024633/Tetsuo-Handa/mm-oom-Fix-unnecessary-killing-of-additional-processes/20180910-163452

Do you have a msg-id to the patch that has been tested?


message_id: 20180910125513.311-2-mho...@kernel.org

Best Regards,
Rong Chen


Re: [PATCH v2] net: macb: Clean 64b dma addresses if they are not detected

2018-09-25 Thread Nicolas Ferre

On 25/09/2018 at 08:32, Michal Simek wrote:

Clear ADDR64 dma bit in DMACFG register in case that HW_DMA_CAP_64B is
not detected on 64bit system.
The issue was observed when bootloader(u-boot) does not check macb
feature at DCFG6 register (DAW64_OFFSET) and enabling 64bit dma support
by default. Then macb driver is reading DMACFG register back and only
adding 64bit dma configuration but not cleaning it out.

Signed-off-by: Michal Simek 


Acked-by: Nicolas Ferre 

Thanks Michal. Best regards,
  Nicolas


---

Changes in v2:
- Clean reg at the first place - Edgar
- Update commit message

  drivers/net/ethernet/cadence/macb_main.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c 
b/drivers/net/ethernet/cadence/macb_main.c
index 16e4ef7d7185..ed8a5c53467e 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2160,6 +2160,7 @@ static void macb_configure_dma(struct macb *bp)
else
dmacfg &= ~GEM_BIT(TXCOEN);
  
+		dmacfg &= ~GEM_BIT(ADDR64);

  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
if (bp->hw_dma_cap & HW_DMA_CAP_64B)
dmacfg |= GEM_BIT(ADDR64);




--
Nicolas Ferre


Re: [PATCH v2 2/2] treewide: use bus_find_device_by_fwnode

2018-09-25 Thread Greg Kroah-Hartman
On Tue, Sep 25, 2018 at 10:58:48AM +0530, Silesh C V wrote:
> Use bus_find_device_by_fwnode helper to find the device having a
> specific firmware node on a bus.
> ---


No signed off by?

:(


Re: [PATCH V2 05/27] mmc: mmci: move mmci next cookie to mci host

2018-09-25 Thread Ludovic BARRE




On 09/24/2018 08:46 PM, Ulf Hansson wrote:

On 21 September 2018 at 11:45, Ludovic Barre  wrote:

From: Ludovic Barre 

This patch moves next cookie to mmci host structure to
share same cookie management between all variants.


I would suggest you to squash this with patch4, as I think it makes
more sense to keep these changes together.


ok





Signed-off-by: Ludovic Barre 
---
  drivers/mmc/host/mmci.c | 10 --
  drivers/mmc/host/mmci.h |  2 ++
  2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 6de7c8d..43f2a1a 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -418,7 +418,6 @@ static void mmci_init_sg(struct mmci_host *host, struct 
mmc_data *data)
  struct mmci_dmae_next {
 struct dma_async_tx_descriptor *desc;
 struct dma_chan *chan;
-   s32 cookie;
  };

  struct mmci_dmae_priv {
@@ -449,7 +448,7 @@ static int mmci_dma_setup(struct mmci_host *host)
  "tx");

 /* initialize pre request cookie */
-   dmae->next_data.cookie = 1;
+   host->next_cookie = 1;

 /*
  * If only an RX channel is specified, the driver will
@@ -716,7 +715,7 @@ static void mmci_get_next_data(struct mmci_host *host, 
struct mmc_data *data)
 struct mmci_dmae_priv *dmae = host->dma_priv;
 struct mmci_dmae_next *next = &dmae->next_data;

-   WARN_ON(data->host_cookie && data->host_cookie != next->cookie);
+   WARN_ON(data->host_cookie && data->host_cookie != host->next_cookie);
 WARN_ON(!data->host_cookie && (next->desc || next->chan));

 dmae->desc_current = next->desc;
@@ -728,9 +727,7 @@ static void mmci_get_next_data(struct mmci_host *host, 
struct mmc_data *data)
  static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq)
  {
 struct mmci_host *host = mmc_priv(mmc);
-   struct mmci_dmae_priv *dmae = host->dma_priv;
 struct mmc_data *data = mrq->data;
-   struct mmci_dmae_next *nd = &dmae->next_data;

 if (!data)
 return;
@@ -741,7 +738,8 @@ static void mmci_pre_request(struct mmc_host *mmc, struct 
mmc_request *mrq)
 return;

 if (!mmci_dma_prep_next(host, data))
-   data->host_cookie = ++nd->cookie < 0 ? 1 : nd->cookie;
+   data->host_cookie = ++host->next_cookie < 0 ?
+   1 : host->next_cookie;
  }

  static void mmci_post_request(struct mmc_host *mmc, struct mmc_request *mrq,
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 1e9a45b..5bb1ce2 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -318,5 +318,7 @@ struct mmci_host {
 int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain);

 void*dma_priv;
+
+   s32 next_cookie;
  };

--
2.7.4



Kind regards
Uffe



Re: [PATCH v2] staging: vc04_services: Update TODO re: arm64

2018-09-25 Thread Stefan Wahren


Hi Aymen,

there are more than one TODO file in vc04_services (contains 3 drivers),
so here is my suggestion for the subject:

staging: bcm2835-camera: Update TODO

Am 25.09.2018 um 01:34 schrieb Aymen Qader:
> Remove the TODO item to remove manual cache flushing from bulk_receive.
> This was previously done in this commit:
>
> (7e8dbdea) staging: bcm2835-camera: Remove explicit cache flush
> operations

please use this format, which should also make scripts/checkpatch.pl happy:

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations ")

>
> Also remove the unnecessary asm/cacheflush.h header.

This doesn't match to the subject. Maybe a separate patch would be
better here.

Thanks
Stefan

>
> Signed-off-by: Aymen Qader 
> ---
> v2: Make commit message clearer
>
>  drivers/staging/vc04_services/bcm2835-camera/TODO | 6 --
>  drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 1 -
>  2 files changed, 7 deletions(-)
>
> diff --git a/drivers/staging/vc04_services/bcm2835-camera/TODO 
> b/drivers/staging/vc04_services/bcm2835-camera/TODO
> index cefce72d814f..6c2b4ffe4996 100644
> --- a/drivers/staging/vc04_services/bcm2835-camera/TODO
> +++ b/drivers/staging/vc04_services/bcm2835-camera/TODO
> @@ -15,9 +15,3 @@ padding in the V4L2 spec, but that padding doesn't match 
> what the
>  hardware can do.  If we exposed the native padding requirements
>  through the V4L2 "multiplanar" formats, the firmware would have one
>  less copy it needed to do.
> -
> -3) Port to ARM64
> -
> -The bulk_receive() does some manual cache flushing that are 32-bit ARM
> -only, which we should convert to proper cross-platform APIs.
> -
> diff --git a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c 
> b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
> index 51e5b04ff0f5..6e5c1d4ee122 100644
> --- a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
> +++ b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
> @@ -21,7 +21,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  
>  #include "mmal-common.h"


[PATCH -V5 RESEND 02/21] swap: Add __swap_duplicate_locked()

2018-09-25 Thread Huang Ying
The part of __swap_duplicate() with lock held is separated into a new
function __swap_duplicate_locked().  Because we will add more logic
about the PMD swap mapping into __swap_duplicate() and keep the most
PTE swap mapping related logic in __swap_duplicate_locked().

Just mechanical code refactoring, there is no any functional change in
this patch.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/swapfile.c | 63 +--
 1 file changed, 35 insertions(+), 28 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 97a1bd1a7c9a..6a570ef00fa7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3436,32 +3436,12 @@ void si_swapinfo(struct sysinfo *val)
spin_unlock(&swap_lock);
 }
 
-/*
- * Verify that a swap entry is valid and increment its swap map count.
- *
- * Returns error code in following case.
- * - success -> 0
- * - swp_entry is invalid -> EINVAL
- * - swp_entry is migration entry -> EINVAL
- * - swap-cache reference is requested but there is already one. -> EEXIST
- * - swap-cache reference is requested but the entry is not used. -> ENOENT
- * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
- */
-static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
+static int __swap_duplicate_locked(struct swap_info_struct *p,
+  unsigned long offset, unsigned char usage)
 {
-   struct swap_info_struct *p;
-   struct swap_cluster_info *ci;
-   unsigned long offset;
unsigned char count;
unsigned char has_cache;
-   int err = -EINVAL;
-
-   p = get_swap_device(entry);
-   if (!p)
-   goto out;
-
-   offset = swp_offset(entry);
-   ci = lock_cluster_or_swap_info(p, offset);
+   int err = 0;
 
count = p->swap_map[offset];
 
@@ -3471,12 +3451,11 @@ static int __swap_duplicate(swp_entry_t entry, unsigned 
char usage)
 */
if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
err = -ENOENT;
-   goto unlock_out;
+   goto out;
}
 
has_cache = count & SWAP_HAS_CACHE;
count &= ~SWAP_HAS_CACHE;
-   err = 0;
 
if (usage == SWAP_HAS_CACHE) {
 
@@ -3503,11 +3482,39 @@ static int __swap_duplicate(swp_entry_t entry, unsigned 
char usage)
 
p->swap_map[offset] = count | has_cache;
 
-unlock_out:
+out:
+   return err;
+}
+
+/*
+ * Verify that a swap entry is valid and increment its swap map count.
+ *
+ * Returns error code in following case.
+ * - success -> 0
+ * - swp_entry is invalid -> EINVAL
+ * - swp_entry is migration entry -> EINVAL
+ * - swap-cache reference is requested but there is already one. -> EEXIST
+ * - swap-cache reference is requested but the entry is not used. -> ENOENT
+ * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
+ */
+static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
+{
+   struct swap_info_struct *p;
+   struct swap_cluster_info *ci;
+   unsigned long offset;
+   int err = -EINVAL;
+
+   p = get_swap_device(entry);
+   if (!p)
+   goto out;
+
+   offset = swp_offset(entry);
+   ci = lock_cluster_or_swap_info(p, offset);
+   err = __swap_duplicate_locked(p, offset, usage);
unlock_cluster_or_swap_info(p, ci);
+
+   put_swap_device(p);
 out:
-   if (p)
-   put_swap_device(p);
return err;
 }
 
-- 
2.16.4



[PATCH -V5 RESEND 00/21] swap: Swapout/swapin THP in one piece

2018-09-25 Thread Huang Ying
Hi, Andrew, could you help me to check whether the overall design is
reasonable?

Hi, Hugh, Shaohua, Minchan and Rik, could you help me to review the
swap part of the patchset?  Especially [02/21], [03/21], [04/21],
[05/21], [06/21], [07/21], [08/21], [09/21], [10/21], [11/21],
[12/21], [20/21], [21/21].

Hi, Andrea and Kirill, could you help me to review the THP part of the
patchset?  Especially [01/21], [07/21], [09/21], [11/21], [13/21],
[15/21], [16/21], [17/21], [18/21], [19/21], [20/21].

Hi, Johannes and Michal, could you help me to review the cgroup part
of the patchset?  Especially [14/21].

And for all, Any comment is welcome!

This patchset is based on the 2018-09-20 head of mmotm/master.

This is the final step of THP (Transparent Huge Page) swap
optimization.  After the first and second step, the splitting huge
page is delayed from almost the first step of swapout to after swapout
has been finished.  In this step, we avoid splitting THP for swapout
and swapout/swapin the THP in one piece.

We tested the patchset with vm-scalability benchmark swap-w-seq test
case, with 16 processes.  The test case forks 16 processes.  Each
process allocates large anonymous memory range, and writes it from
begin to end for 8 rounds.  The first round will swapout, while the
remaining rounds will swapin and swapout.  The test is done on a Xeon
E5 v3 system, the swap device used is a RAM simulated PMEM (persistent
memory) device.  The test result is as follow,

base  optimized
 -- 
 %stddev %change %stddev
 \  |\  
   1417897 ±  2%+992.8%   15494673vm-scalability.throughput
   1020489 ±  4%   +1091.2%   12156349vmstat.swap.si
   1255093 ±  3%+940.3%   13056114vmstat.swap.so
   1259769 ±  7%   +1818.3%   24166779meminfo.AnonHugePages
  28021761   -10.7%   25018848 ±  2%  meminfo.AnonPages
  64080064 ±  4% -95.6%2787565 ± 33%  
interrupts.CAL:Function_call_interrupts
 13.91 ±  5% -13.80.10 ± 27%  
perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath

Where, the score of benchmark (bytes written per second) improved
992.8%.  The swapout/swapin throughput improved 1008% (from about
2.17GB/s to 24.04GB/s).  The performance difference is huge.  In base
kernel, for the first round of writing, the THP is swapout and split,
so in the remaining rounds, there is only normal page swapin and
swapout.  While in optimized kernel, the THP is kept after first
swapout, so THP swapin and swapout is used in the remaining rounds.
This shows the key benefit to swapout/swapin THP in one piece, the THP
will be kept instead of being split.  meminfo information verified
this, in base kernel only 4.5% of anonymous page are THP during the
test, while in optimized kernel, that is 96.6%.  The TLB flushing IPI
(represented as interrupts.CAL:Function_call_interrupts) reduced
95.6%, while cycles for spinlock reduced from 13.9% to 0.1%.  These
are performance benefit of THP swapout/swapin too.

Below is the description for all steps of THP swap optimization.

Recently, the performance of the storage devices improved so fast that
we cannot saturate the disk bandwidth with single logical CPU when do
page swapping even on a high-end server machine.  Because the
performance of the storage device improved faster than that of single
logical CPU.  And it seems that the trend will not change in the near
future.  On the other hand, the THP becomes more and more popular
because of increased memory size.  So it becomes necessary to optimize
THP swap performance.

The advantages to swapout/swapin a THP in one piece include:

- Batch various swap operations for the THP.  Many operations need to
  be done once per THP instead of per normal page, for example,
  allocating/freeing the swap space, writing/reading the swap space,
  flushing TLB, page fault, etc.  This will improve the performance of
  the THP swap greatly.

- The THP swap space read/write will be large sequential IO (2M on
  x86_64).  It is particularly helpful for the swapin, which are
  usually 4k random IO.  This will improve the performance of the THP
  swap too.

- It will help the memory fragmentation, especially when the THP is
  heavily used by the applications.  The THP order pages will be free
  up after THP swapout.

- It will improve the THP utilization on the system with the swap
  turned on.  Because the speed for khugepaged to collapse the normal
  pages into the THP is quite slow.  After the THP is split during the
  swapout, it will take quite long time for the normal pages to
  collapse back into the THP after being swapin.  The high THP
  utilization helps the efficiency of the page based memory management
  too.

There are some concerns regarding THP swapin, mainly because possible
enlarged read/write IO size (for swapout/swapin) may put more overhead
on the storage device.  

[PATCH -V5 RESEND 04/21] swap: Support PMD swap mapping in put_swap_page()

2018-09-25 Thread Huang Ying
Previously, during swapout, all PMD page mapping will be split and
replaced with PTE swap mapping.  And when clearing the SWAP_HAS_CACHE
flag for the huge swap cluster in put_swap_page(), the huge swap
cluster will be split.  Now, during swapout, the PMD page mappings to
the THP will be changed to PMD swap mappings to the corresponding swap
cluster.  So when clearing the SWAP_HAS_CACHE flag, the huge swap
cluster will only be split if the PMD swap mapping count is 0.
Otherwise, we will keep it as the huge swap cluster.  So that we can
swapin a THP in one piece later.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/swapfile.c | 31 ---
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 138968b79de5..553d2551b35a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1314,6 +1314,15 @@ void swap_free(swp_entry_t entry)
 
 /*
  * Called after dropping swapcache to decrease refcnt to swap entries.
+ *
+ * When a THP is added into swap cache, the SWAP_HAS_CACHE flag will
+ * be set in the swap_map[] of all swap entries in the huge swap
+ * cluster backing the THP.  This huge swap cluster will not be split
+ * unless the THP is split even if its PMD swap mapping count dropped
+ * to 0.  Later, when the THP is removed from swap cache, the
+ * SWAP_HAS_CACHE flag will be cleared in the swap_map[] of all swap
+ * entries in the huge swap cluster.  And this huge swap cluster will
+ * be split if its PMD swap mapping count is 0.
  */
 void put_swap_page(struct page *page, swp_entry_t entry)
 {
@@ -1332,15 +1341,23 @@ void put_swap_page(struct page *page, swp_entry_t entry)
 
ci = lock_cluster_or_swap_info(si, offset);
if (size == SWAPFILE_CLUSTER) {
-   VM_BUG_ON(!cluster_is_huge(ci));
+   VM_BUG_ON(!IS_ALIGNED(offset, size));
map = si->swap_map + offset;
-   for (i = 0; i < SWAPFILE_CLUSTER; i++) {
-   val = map[i];
-   VM_BUG_ON(!(val & SWAP_HAS_CACHE));
-   if (val == SWAP_HAS_CACHE)
-   free_entries++;
+   /*
+* No PMD swap mapping, the swap cluster will be freed
+* if all swap entries becoming free, otherwise the
+* huge swap cluster will be split.
+*/
+   if (!cluster_swapcount(ci)) {
+   for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+   val = map[i];
+   VM_BUG_ON(!(val & SWAP_HAS_CACHE));
+   if (val == SWAP_HAS_CACHE)
+   free_entries++;
+   }
+   if (free_entries != SWAPFILE_CLUSTER)
+   cluster_clear_huge(ci);
}
-   cluster_clear_huge(ci);
if (free_entries == SWAPFILE_CLUSTER) {
unlock_cluster_or_swap_info(si, ci);
spin_lock(&si->lock);
-- 
2.16.4



[PATCH -V5 RESEND 19/21] swap: Support PMD swap mapping in common path

2018-09-25 Thread Huang Ying
Original code is only for PMD migration entry, it is revised to
support PMD swap mapping.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 fs/proc/task_mmu.c | 12 +---
 mm/gup.c   | 36 
 mm/huge_memory.c   |  7 ---
 mm/mempolicy.c |  2 +-
 4 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..2d968523c57b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -972,7 +972,7 @@ static inline void clear_soft_dirty_pmd(struct 
vm_area_struct *vma,
pmd = pmd_clear_soft_dirty(pmd);
 
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
-   } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+   } else if (is_swap_pmd(pmd)) {
pmd = pmd_swp_clear_soft_dirty(pmd);
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
}
@@ -1302,9 +1302,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
-   }
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-   else if (is_swap_pmd(pmd)) {
+   } else if (IS_ENABLED(CONFIG_HAVE_PMD_SWAP_ENTRY) &&
+  is_swap_pmd(pmd)) {
swp_entry_t entry = pmd_to_swp_entry(pmd);
unsigned long offset;
 
@@ -1317,10 +1316,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
-   VM_BUG_ON(!is_pmd_migration_entry(pmd));
-   page = migration_entry_to_page(entry);
+   if (is_pmd_migration_entry(pmd))
+   page = migration_entry_to_page(entry);
}
-#endif
 
if (page && page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
diff --git a/mm/gup.c b/mm/gup.c
index 1abc8b4afff6..b35b7729b1b7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -216,6 +216,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
+   swp_entry_t entry;
 
pmd = pmd_offset(pudp, address);
/*
@@ -243,18 +244,22 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
if (!pmd_present(pmdval)) {
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
-   VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(pmdval));
-   if (is_pmd_migration_entry(pmdval))
+   entry = pmd_to_swp_entry(pmdval);
+   if (thp_migration_supported() && is_migration_entry(entry)) {
pmd_migration_entry_wait(mm, pmd);
-   pmdval = READ_ONCE(*pmd);
-   /*
-* MADV_DONTNEED may convert the pmd to null because
-* mmap_sem is held in read mode
-*/
-   if (pmd_none(pmdval))
+   pmdval = READ_ONCE(*pmd);
+   /*
+* MADV_DONTNEED may convert the pmd to null because
+* mmap_sem is held in read mode
+*/
+   if (pmd_none(pmdval))
+   return no_page_table(vma, flags);
+   goto retry;
+   }
+   if (IS_ENABLED(CONFIG_THP_SWAP) && !non_swap_entry(entry))
return no_page_table(vma, flags);
-   goto retry;
+   WARN_ON(1);
+   return no_page_table(vma, flags);
}
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
@@ -276,11 +281,18 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
return no_page_table(vma, flags);
}
if (unlikely(!pmd_present(*pmd))) {
+   entry = pmd_to_swp_entry(*pmd);
spin_unlock(ptl);
if (likely(!(flags & FOLL_MIGRATION)))
return no_page_table(vma, flags);
-   pmd_migration_entry_wait(mm, pmd);
-   goto retry_locked;
+   if (thp_migration_supported() && is_migration_entry(entry)) {
+   pmd_migration_entry_wait(mm, pmd);
+   goto retry_locked;
+   }
+   

[PATCH -V5 RESEND 06/21] swap: Support PMD swap mapping when splitting huge PMD

2018-09-25 Thread Huang Ying
A huge PMD need to be split when zap a part of the PMD mapping etc.
If the PMD mapping is a swap mapping, we need to split it too.  This
patch implemented the support for this.  This is similar as splitting
the PMD page mapping, except we need to decrease the PMD swap mapping
count for the huge swap cluster too.  If the PMD swap mapping count
becomes 0, the huge swap cluster will be split.

Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
PMD, so pmd_present() check is called before them.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/huge_mm.h |  4 
 include/linux/swap.h|  6 ++
 mm/huge_memory.c| 48 +++-
 mm/swapfile.c   | 32 
 4 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..0f3e1739986f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -226,6 +226,10 @@ static inline bool is_huge_zero_page(struct page *page)
return READ_ONCE(huge_zero_page) == page;
 }
 
+/*
+ * is_huge_zero_pmd() must be called after checking pmd_present(),
+ * otherwise, it may report false positive for PMD swap entry.
+ */
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
return is_huge_zero_page(pmd_page(pmd));
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 88eb06eb1444..5bd54b6fd4a1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page(struct page *page)
 
 #ifdef CONFIG_THP_SWAP
 extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster_map(swp_entry_t entry);
 #else
 static inline int split_swap_cluster(swp_entry_t entry)
 {
return 0;
 }
+
+static inline int split_swap_cluster_map(swp_entry_t entry)
+{
+   return 0;
+}
 #endif
 
 #ifdef CONFIG_MEMCG
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 63edf18ca9cc..3ea7318fcdcd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1604,6 +1604,40 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, 
pmd_t pmd)
return 0;
 }
 
+/* Convert a PMD swap mapping to a set of PTE swap mappings */
+static void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd)
+{
+   struct mm_struct *mm = vma->vm_mm;
+   pgtable_t pgtable;
+   pmd_t _pmd;
+   swp_entry_t entry;
+   int i, soft_dirty;
+
+   entry = pmd_to_swp_entry(*pmd);
+   soft_dirty = pmd_soft_dirty(*pmd);
+
+   split_swap_cluster_map(entry);
+
+   pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+   pmd_populate(mm, &_pmd, pgtable);
+
+   for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) {
+   pte_t *pte, ptent;
+
+   pte = pte_offset_map(&_pmd, haddr);
+   VM_BUG_ON(!pte_none(*pte));
+   ptent = swp_entry_to_pte(entry);
+   if (soft_dirty)
+   ptent = pte_swp_mksoft_dirty(ptent);
+   set_pte_at(mm, haddr, pte, ptent);
+   pte_unmap(pte);
+   }
+   smp_wmb(); /* make pte visible before pmd */
+   pmd_populate(mm, pmd, pgtable);
+}
+
 /*
  * Return true if we do MADV_FREE successfully on entire pmd page.
  * Otherwise, return false.
@@ -2070,7 +2104,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-   VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+   VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd)
&& !pmd_devmap(*pmd));
 
count_vm_event(THP_SPLIT_PMD);
@@ -2094,7 +2128,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
put_page(page);
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
-   } else if (is_huge_zero_pmd(*pmd)) {
+   } else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
 * FIXME: Do we want to invalidate secondary mmu by calling
 * mmu_notifier_invalidate_range() see comments below inside
@@ -2138,6 +2172,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
page = pfn_to_page(swp_offset(entry));
} else
 #endif
+   if (IS_ENABLED(CONFIG_THP_SWAP) && is_swap_pmd(old_pmd))
+   return __split_huge_swap_pmd(vma, haddr, pmd);
+   else
page = pmd_page(old

[PATCH -V5 RESEND 01/21] swap: Enable PMD swap operations for CONFIG_THP_SWAP

2018-09-25 Thread Huang Ying
Currently, "the swap entry" in the page tables is used for a number of
things outside of actual swap, like page migration, etc.  We support
the THP/PMD "swap entry" for page migration currently and the
functions behind this are tied to page migration's config
option (CONFIG_ARCH_ENABLE_THP_MIGRATION).

But, we also need them for THP swap optimization.  So a new config
option (CONFIG_HAVE_PMD_SWAP_ENTRY) is added.  It is enabled when
either CONFIG_ARCH_ENABLE_THP_MIGRATION or CONFIG_THP_SWAP is enabled.
And PMD swap entry functions are tied to this new config option
instead.  Some functions enabled by CONFIG_ARCH_ENABLE_THP_MIGRATION
are for page migration only, they are still enabled only for that.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 arch/x86/include/asm/pgtable.h |  2 +-
 include/asm-generic/pgtable.h  |  2 +-
 include/linux/swapops.h| 44 ++
 mm/Kconfig |  8 
 4 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 40616e805292..e830ab345551 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1333,7 +1333,7 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
 }
 
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#ifdef CONFIG_HAVE_PMD_SWAP_ENTRY
 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
 {
return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5657a20e0c59..eb1e9d17371b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -675,7 +675,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct 
*mm,
 #endif
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#ifndef CONFIG_HAVE_PMD_SWAP_ENTRY
 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
 {
return pmd;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 22af9d8a84ae..79ccbf8789d5 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -259,17 +259,7 @@ static inline int is_write_migration_entry(swp_entry_t 
entry)
 
 #endif
 
-struct page_vma_mapped_walk;
-
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
-extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
-   struct page *page);
-
-extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw,
-   struct page *new);
-
-extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
-
+#ifdef CONFIG_HAVE_PMD_SWAP_ENTRY
 static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
 {
swp_entry_t arch_entry;
@@ -287,6 +277,28 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
return __swp_entry_to_pmd(arch_entry);
 }
+#else
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+   return swp_entry(0, 0);
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+   return __pmd(0);
+}
+#endif
+
+struct page_vma_mapped_walk;
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
+   struct page *page);
+
+extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw,
+   struct page *new);
+
+extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
 
 static inline int is_pmd_migration_entry(pmd_t pmd)
 {
@@ -307,16 +319,6 @@ static inline void remove_migration_pmd(struct 
page_vma_mapped_walk *pvmw,
 
 static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
 
-static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
-{
-   return swp_entry(0, 0);
-}
-
-static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
-{
-   return __pmd(0);
-}
-
 static inline int is_pmd_migration_entry(pmd_t pmd)
 {
return 0;
diff --git a/mm/Kconfig b/mm/Kconfig
index c6a0d82af45f..b7f7fb145d0f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -424,6 +424,14 @@ config THP_SWAP
 
  For selection by architectures with reasonable THP sizes.
 
+#
+# "PMD swap entry" in the page table is used both for migration and
+# actual swap.
+#
+config HAVE_PMD_SWAP_ENTRY
+   def_bool y
+   depends on THP_SWAP || ARCH_ENABLE_THP_MIGRATION
+
 config TRANSPARENT_HUGE_PAGECACHE
def_bool y
depends on TRANSPARENT_HUGEPAGE
-- 
2.16.4



[PATCH -V5 RESEND 17/21] swap: Support PMD swap mapping for MADV_WILLNEED

2018-09-25 Thread Huang Ying
During MADV_WILLNEED, for a PMD swap mapping, if THP swapin is enabled
for the VMA, the whole swap cluster will be swapin.  Otherwise, the
huge swap cluster and the PMD swap mapping will be split and fallback
to PTE swap mapping.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/madvise.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 07ef599d4255..608c5ae201c6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -196,14 +196,36 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned 
long start,
pte_t *orig_pte;
struct vm_area_struct *vma = walk->private;
unsigned long index;
+   swp_entry_t entry;
+   struct page *page;
+   pmd_t pmdval;
+
+   pmdval = *pmd;
+   if (IS_ENABLED(CONFIG_THP_SWAP) && is_swap_pmd(pmdval) &&
+   !is_pmd_migration_entry(pmdval)) {
+   entry = pmd_to_swp_entry(pmdval);
+   if (!transparent_hugepage_swapin_enabled(vma)) {
+   if (!split_swap_cluster(entry, 0))
+   split_huge_swap_pmd(vma, pmd, start, pmdval);
+   } else {
+   page = read_swap_cache_async(entry,
+GFP_HIGHUSER_MOVABLE,
+vma, start, false);
+   if (page) {
+   /* The swap cluster has been split under us */
+   if (!PageTransHuge(page))
+   split_huge_swap_pmd(vma, pmd, start,
+   pmdval);
+   put_page(page);
+   }
+   }
+   }
 
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
return 0;
 
for (index = start; index != end; index += PAGE_SIZE) {
pte_t pte;
-   swp_entry_t entry;
-   struct page *page;
spinlock_t *ptl;
 
orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
-- 
2.16.4



[PATCH -V5 RESEND 05/21] swap: Support PMD swap mapping in free_swap_and_cache()/swap_free()

2018-09-25 Thread Huang Ying
When a PMD swap mapping is removed from a huge swap cluster, for
example, unmap a memory range mapped with PMD swap mapping, etc,
free_swap_and_cache() will be called to decrease the reference count
to the huge swap cluster.  free_swap_and_cache() may also free or
split the huge swap cluster, and free the corresponding THP in swap
cache if necessary.  swap_free() is similar, and shares most
implementation with free_swap_and_cache().  This patch revises
free_swap_and_cache() and swap_free() to implement this.

If the swap cluster has been split already, for example, because of
failing to allocate a THP during swapin, we just decrease one from the
reference count of all swap slots.

Otherwise, we will decrease one from the reference count of all swap
slots and the PMD swap mapping count in cluster_count().  When the
corresponding THP isn't in swap cache, if PMD swap mapping count
becomes 0, the huge swap cluster will be split, and if all swap count
becomes 0, the huge swap cluster will be freed.  When the corresponding
THP is in swap cache, if every swap_map[offset] == SWAP_HAS_CACHE, we
will try to delete the THP from swap cache.  Which will cause the THP
and the huge swap cluster be freed.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 arch/s390/mm/pgtable.c |   2 +-
 include/linux/swap.h   |   9 +--
 kernel/power/swap.c|   4 +-
 mm/madvise.c   |   2 +-
 mm/memory.c|   4 +-
 mm/shmem.c |   6 +-
 mm/swapfile.c  | 171 ++---
 7 files changed, 149 insertions(+), 49 deletions(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index f2cc7da473e4..ffd4b68adbb3 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -675,7 +675,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, 
swp_entry_t entry)
 
dec_mm_counter(mm, mm_counter(page));
}
-   free_swap_and_cache(entry);
+   free_swap_and_cache(entry, 1);
 }
 
 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3149cdb52e6d..88eb06eb1444 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -453,9 +453,9 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t *entry, int entry_size);
 extern int swapcache_prepare(swp_entry_t entry, int entry_size);
-extern void swap_free(swp_entry_t);
+extern void swap_free(swp_entry_t entry, int entry_size);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t entry, int entry_size);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
@@ -509,7 +509,8 @@ static inline void show_swap_cache_info(void)
 {
 }
 
-#define free_swap_and_cache(e) ({(is_migration_entry(e) || 
is_device_private_entry(e));})
+#define free_swap_and_cache(e, s)  \
+   ({(is_migration_entry(e) || is_device_private_entry(e)); })
 #define swapcache_prepare(e, s)
\
({(is_migration_entry(e) || is_device_private_entry(e)); })
 
@@ -527,7 +528,7 @@ static inline int swap_duplicate(swp_entry_t *swp, int 
entry_size)
return 0;
 }
 
-static inline void swap_free(swp_entry_t swp)
+static inline void swap_free(swp_entry_t swp, int entry_size)
 {
 }
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index d7f6c1a288d3..0275df84ed3d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -182,7 +182,7 @@ sector_t alloc_swapdev_block(int swap)
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
-   swap_free(swp_entry(swap, offset));
+   swap_free(swp_entry(swap, offset), 1);
else
return swapdev_block(swap, offset);
}
@@ -206,7 +206,7 @@ void free_all_swap_pages(int swap)
ext = rb_entry(node, struct swsusp_extent, node);
rb_erase(node, &swsusp_extents);
for (offset = ext->start; offset <= ext->end; offset++)
-   swap_free(swp_entry(swap, offset));
+   swap_free(swp_entry(swap, offset), 1);
 
kfree(ext);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..6fff1c1d2009 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -349,7 +349,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long 
addr,
  

[PATCH -V5 RESEND 18/21] swap: Support PMD swap mapping in mincore()

2018-09-25 Thread Huang Ying
During mincore(), for PMD swap mapping, swap cache will be looked up.
If the resulting page isn't compound page, the PMD swap mapping will
be split and fallback to PTE swap mapping processing.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/mincore.c | 37 +++--
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/mm/mincore.c b/mm/mincore.c
index a66f2052c7b1..a2a66c3c8c6a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -48,7 +48,8 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, 
unsigned long addr,
  * and is up to date; i.e. that no page-in operation would be required
  * at this time if an application were to map and access this page.
  */
-static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
+static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff,
+ bool *compound)
 {
unsigned char present = 0;
struct page *page;
@@ -86,6 +87,8 @@ static unsigned char mincore_page(struct address_space 
*mapping, pgoff_t pgoff)
 #endif
if (page) {
present = PageUptodate(page);
+   if (compound)
+   *compound = PageCompound(page);
put_page(page);
}
 
@@ -103,7 +106,8 @@ static int __mincore_unmapped_range(unsigned long addr, 
unsigned long end,
 
pgoff = linear_page_index(vma, addr);
for (i = 0; i < nr; i++, pgoff++)
-   vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
+   vec[i] = mincore_page(vma->vm_file->f_mapping,
+ pgoff, NULL);
} else {
for (i = 0; i < nr; i++)
vec[i] = 0;
@@ -127,14 +131,36 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long 
addr, unsigned long end,
pte_t *ptep;
unsigned char *vec = walk->private;
int nr = (end - addr) >> PAGE_SHIFT;
+   swp_entry_t entry;
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
-   memset(vec, 1, nr);
+   unsigned char val = 1;
+   bool compound;
+
+   if (IS_ENABLED(CONFIG_THP_SWAP) && is_swap_pmd(*pmd)) {
+   entry = pmd_to_swp_entry(*pmd);
+   if (!non_swap_entry(entry)) {
+   val = mincore_page(swap_address_space(entry),
+  swp_offset(entry),
+  &compound);
+   /*
+* The huge swap cluster has been
+* split under us
+*/
+   if (!compound) {
+   __split_huge_swap_pmd(vma, addr, pmd);
+   spin_unlock(ptl);
+   goto fallback;
+   }
+   }
+   }
+   memset(vec, val, nr);
spin_unlock(ptl);
goto out;
}
 
+fallback:
if (pmd_trans_unstable(pmd)) {
__mincore_unmapped_range(addr, end, vma, vec);
goto out;
@@ -150,8 +176,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long 
addr, unsigned long end,
else if (pte_present(pte))
*vec = 1;
else { /* pte is a swap entry */
-   swp_entry_t entry = pte_to_swp_entry(pte);
-
+   entry = pte_to_swp_entry(pte);
if (non_swap_entry(entry)) {
/*
 * migration or hwpoison entries are always
@@ -161,7 +186,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long 
addr, unsigned long end,
} else {
 #ifdef CONFIG_SWAP
*vec = mincore_page(swap_address_space(entry),
-   swp_offset(entry));
+   swp_offset(entry), NULL);
 #else
WARN_ON(1);
*vec = 1;
-- 
2.16.4



[PATCH -V5 RESEND 20/21] swap: create PMD swap mapping when unmap the THP

2018-09-25 Thread Huang Ying
This is the final step of the THP swapin support.  When reclaiming a
anonymous THP, after allocating the huge swap cluster and add the THP
into swap cache, the PMD page mapping will be changed to the mapping
to the swap space.  Previously, the PMD page mapping will be split
before being changed.  In this patch, the unmap code is enhanced not
to split the PMD mapping, but create a PMD swap mapping to replace it
instead.  So later when clear the SWAP_HAS_CACHE flag in the last step
of swapout, the huge swap cluster will be kept instead of being split,
and when swapin, the huge swap cluster will be read in one piece into a
THP.  That is, the THP will not be split during swapout/swapin.  This
can eliminate the overhead of splitting/collapsing, and reduce the
page fault count, etc.  But more important, the utilization of THP is
improved greatly, that is, much more THP will be kept when swapping is
used, so that we can take full advantage of THP including its high
performance for swapout/swapin.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/huge_mm.h | 11 +++
 mm/huge_memory.c| 30 ++
 mm/rmap.c   | 43 ++-
 mm/vmscan.c |  6 +-
 4 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6586c1bfac21..8cbce31bc090 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -405,6 +405,8 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct 
vm_area_struct *vma)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+struct page_vma_mapped_walk;
+
 #ifdef CONFIG_THP_SWAP
 extern void __split_huge_swap_pmd(struct vm_area_struct *vma,
  unsigned long haddr,
@@ -412,6 +414,8 @@ extern void __split_huge_swap_pmd(struct vm_area_struct 
*vma,
 extern int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
   unsigned long address, pmd_t orig_pmd);
 extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw,
+   struct page *page, unsigned long address, pmd_t pmdval);
 
 static inline bool transparent_hugepage_swapin_enabled(
struct vm_area_struct *vma)
@@ -453,6 +457,13 @@ static inline int do_huge_pmd_swap_page(struct vm_fault 
*vmf, pmd_t orig_pmd)
return 0;
 }
 
+static inline bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw,
+ struct page *page, unsigned long address,
+ pmd_t pmdval)
+{
+   return false;
+}
+
 static inline bool transparent_hugepage_swapin_enabled(
struct vm_area_struct *vma)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8eb16d34ea44..2d263771b614 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1884,6 +1884,36 @@ int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t 
orig_pmd)
count_vm_event(THP_SWPIN_FALLBACK);
goto fallback;
 }
+
+bool set_pmd_swap_entry(struct page_vma_mapped_walk *pvmw, struct page *page,
+   unsigned long address, pmd_t pmdval)
+{
+   struct vm_area_struct *vma = pvmw->vma;
+   struct mm_struct *mm = vma->vm_mm;
+   pmd_t swp_pmd;
+   swp_entry_t entry = { .val = page_private(page) };
+
+   if (swap_duplicate(&entry, HPAGE_PMD_NR) < 0) {
+   set_pmd_at(mm, address, pvmw->pmd, pmdval);
+   return false;
+   }
+   if (list_empty(&mm->mmlist)) {
+   spin_lock(&mmlist_lock);
+   if (list_empty(&mm->mmlist))
+   list_add(&mm->mmlist, &init_mm.mmlist);
+   spin_unlock(&mmlist_lock);
+   }
+   add_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+   add_mm_counter(mm, MM_SWAPENTS, HPAGE_PMD_NR);
+   swp_pmd = swp_entry_to_pmd(entry);
+   if (pmd_soft_dirty(pmdval))
+   swp_pmd = pmd_swp_mksoft_dirty(swp_pmd);
+   set_pmd_at(mm, address, pvmw->pmd, swp_pmd);
+
+   page_remove_rmap(page, true);
+   put_page(page);
+   return true;
+}
 #endif
 
 static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
diff --git a/mm/rmap.c b/mm/rmap.c
index 3bb4be720bc0..a180cb1fe2db 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1413,11 +1413,52 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
continue;
}
 
+   address = pvmw.address;
+
+#ifdef CONFIG_THP_SWAP
+   /* PMD-mapped THP swap entry */
+   if (IS_ENABLED(CONFIG_THP_SWAP) &&
+   !pvmw.pte && PageAnon(page)) {
+   pmd_t pmdv

[PATCH -V5 RESEND 08/21] swap: Support to read a huge swap cluster for swapin a THP

2018-09-25 Thread Huang Ying
To swapin a THP in one piece, we need to read a huge swap cluster from
the swap device.  This patch revised the __read_swap_cache_async() and
its callers and callees to support this.  If __read_swap_cache_async()
find the swap cluster of the specified swap entry is huge, it will try
to allocate a THP, add it into the swap cache.  So later the contents
of the huge swap cluster can be read into the THP.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/huge_mm.h | 38 ++
 include/linux/swap.h|  4 +--
 mm/huge_memory.c| 26 --
 mm/swap_state.c | 72 -
 mm/swapfile.c   |  9 ---
 5 files changed, 99 insertions(+), 50 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0f3e1739986f..3fdb29bc250c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -250,6 +250,39 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+/*
+ * always: directly stall for all thp allocations
+ * defer: wake kswapd and fail if not immediately available
+ * defer+madvise: wake kswapd and directly stall for MADV_HUGEPAGE, otherwise
+ *   fail if not immediately available
+ * madvise: directly stall for MADV_HUGEPAGE, otherwise fail if not immediately
+ * available
+ * never: never stall for any thp allocation
+ */
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+{
+   bool vma_madvised;
+
+   if (!vma)
+   return GFP_TRANSHUGE_LIGHT;
+   vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+   if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+&transparent_hugepage_flags))
+   return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+   if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+&transparent_hugepage_flags))
+   return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+   if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
+&transparent_hugepage_flags))
+   return GFP_TRANSHUGE_LIGHT |
+   (vma_madvised ? __GFP_DIRECT_RECLAIM :
+   __GFP_KSWAPD_RECLAIM);
+   if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+&transparent_hugepage_flags))
+   return GFP_TRANSHUGE_LIGHT |
+   (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+   return GFP_TRANSHUGE_LIGHT;
+}
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -363,6 +396,11 @@ static inline bool thp_migration_supported(void)
 {
return false;
 }
+
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+{
+   return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 48c159994438..f0424db46add 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -462,7 +462,7 @@ extern sector_t map_swap_page(struct page *, struct 
block_device **);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int page_swapcount(struct page *);
 extern int __swap_count(swp_entry_t entry);
-extern int __swp_swapcount(swp_entry_t entry);
+extern int __swp_swapcount(swp_entry_t entry, int *entry_size);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
@@ -589,7 +589,7 @@ static inline int __swap_count(swp_entry_t entry)
return 0;
 }
 
-static inline int __swp_swapcount(swp_entry_t entry)
+static inline int __swp_swapcount(swp_entry_t entry, int *entry_size)
 {
return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e5d995195fd9..4d4a447c29a8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -620,32 +620,6 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct 
vm_fault *vmf,
 
 }
 
-/*
- * always: directly stall for all thp allocations
- * defer: wake kswapd and fail if not immediately available
- * defer+madvise: wake kswapd and directly stall for MADV_HUGEPAGE, otherwise
- *   fail if not immediately available
- * madvise: directly stall for MADV_HUGEPAGE, otherwise fail if not immediately
- * available
- * never: never stall for any thp allocation
- */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
-{
-   const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-
-   if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, 

[PATCH -V5 RESEND 14/21] swap: Support to move swap account for PMD swap mapping

2018-09-25 Thread Huang Ying
Previously the huge swap cluster will be split after the THP is
swapout.  Now, to support to swapin the THP in one piece, the huge
swap cluster will not be split after the THP is reclaimed.  So in
memcg, we need to move the swap account for PMD swap mappings in the
process's page table.

When the page table is scanned during moving memcg charge, the PMD
swap mapping will be identified.  And mem_cgroup_move_swap_account()
and its callee is revised to move account for the whole huge swap
cluster.  If the swap cluster mapped by PMD has been split, the PMD
swap mapping will be split and fallback to PTE processing.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/huge_mm.h |   9 
 include/linux/swap.h|   6 +++
 include/linux/swap_cgroup.h |   3 +-
 mm/huge_memory.c|   8 +--
 mm/memcontrol.c | 129 ++--
 mm/swap_cgroup.c|  45 +---
 mm/swapfile.c   |  14 +
 7 files changed, 174 insertions(+), 40 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 25ba9b5f1e60..6586c1bfac21 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -406,6 +406,9 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct 
vm_area_struct *vma)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_THP_SWAP
+extern void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd);
 extern int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
   unsigned long address, pmd_t orig_pmd);
 extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
@@ -433,6 +436,12 @@ static inline bool transparent_hugepage_swapin_enabled(
return false;
 }
 #else /* CONFIG_THP_SWAP */
+static inline void __split_huge_swap_pmd(struct vm_area_struct *vma,
+unsigned long haddr,
+pmd_t *pmd)
+{
+}
+
 static inline int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  unsigned long address, pmd_t orig_pmd)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f0424db46add..74221adc4000 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -621,6 +621,7 @@ static inline swp_entry_t get_swap_page(struct page *page)
 #ifdef CONFIG_THP_SWAP
 extern int split_swap_cluster(swp_entry_t entry, unsigned long flags);
 extern int split_swap_cluster_map(swp_entry_t entry);
+extern int get_swap_entry_size(swp_entry_t entry);
 #else
 static inline int split_swap_cluster(swp_entry_t entry, unsigned long flags)
 {
@@ -631,6 +632,11 @@ static inline int split_swap_cluster_map(swp_entry_t entry)
 {
return 0;
 }
+
+static inline int get_swap_entry_size(swp_entry_t entry)
+{
+   return 1;
+}
 #endif
 
 #ifdef CONFIG_MEMCG
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index a12dd1c3966c..c40fb52b0563 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -7,7 +7,8 @@
 #ifdef CONFIG_MEMCG_SWAP
 
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
-   unsigned short old, unsigned short new);
+   unsigned short old, unsigned short new,
+   unsigned int nr_ents);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
 unsigned int nr_ents);
 extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index aee8614e99f7..35c7243720bc 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1631,10 +1631,11 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, 
pmd_t pmd)
return 0;
 }
 
+#ifdef CONFIG_THP_SWAP
 /* Convert a PMD swap mapping to a set of PTE swap mappings */
-static void __split_huge_swap_pmd(struct vm_area_struct *vma,
- unsigned long haddr,
- pmd_t *pmd)
+void __split_huge_swap_pmd(struct vm_area_struct *vma,
+  unsigned long haddr,
+  pmd_t *pmd)
 {
struct mm_struct *mm = vma->vm_mm;
pgtable_t pgtable;
@@ -1665,7 +1666,6 @@ static void __split_huge_swap_pmd(struct vm_area_struct 
*vma,
pmd_populate(mm, pmd, pgtable);
 }
 
-#ifdef CONFIG_THP_SWAP
 int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, pmd_t orig_pmd)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0da010a4b3bf..28a8b50c64da 100644
--

[PATCH -V5 RESEND 03/21] swap: Support PMD swap mapping in swap_duplicate()

2018-09-25 Thread Huang Ying
To support to swapin the THP in one piece, we need to create PMD swap
mapping during swapout, and maintain PMD swap mapping count.  This
patch implements the support to increase the PMD swap mapping
count (for swapout, fork, etc.)  and set SWAP_HAS_CACHE flag (for
swapin, etc.) for a huge swap cluster in swap_duplicate() function
family.  Although it only implements a part of the design of the swap
reference count with PMD swap mapping, the whole design is described
as follow to make it easy to understand the patch and the whole
picture.

A huge swap cluster is used to hold the contents of a swapouted THP.
After swapout, a PMD page mapping to the THP will become a PMD
swap mapping to the huge swap cluster via a swap entry in PMD.  While
a PTE page mapping to a subpage of the THP will become the PTE swap
mapping to a swap slot in the huge swap cluster via a swap entry in
PTE.

If there is no PMD swap mapping and the corresponding THP is removed
from the page cache (reclaimed), the huge swap cluster will be split
and become a normal swap cluster.

The count (cluster_count()) of the huge swap cluster is
SWAPFILE_CLUSTER (= HPAGE_PMD_NR) + PMD swap mapping count.  Because
all swap slots in the huge swap cluster are mapped by PTE or PMD, or
has SWAP_HAS_CACHE bit set, the usage count of the swap cluster is
HPAGE_PMD_NR.  And the PMD swap mapping count is recorded too to make
it easy to determine whether there are remaining PMD swap mappings.

The count in swap_map[offset] is the sum of PTE and PMD swap mapping
count.  This means when we increase the PMD swap mapping count, we
need to increase swap_map[offset] for all swap slots inside the swap
cluster.  An alternative choice is to make swap_map[offset] to record
PTE swap map count only, given we have recorded PMD swap mapping count
in the count of the huge swap cluster.  But this need to increase
swap_map[offset] when splitting the PMD swap mapping, that may fail
because of memory allocation for swap count continuation.  That is
hard to dealt with.  So we choose current solution.

The PMD swap mapping to a huge swap cluster may be split when unmap a
part of PMD mapping etc.  That is easy because only the count of the
huge swap cluster need to be changed.  When the last PMD swap mapping
is gone and SWAP_HAS_CACHE is unset, we will split the huge swap
cluster (clear the huge flag).  This makes it easy to reason the
cluster state.

A huge swap cluster will be split when splitting the THP in swap
cache, or failing to allocate THP during swapin, etc.  But when
splitting the huge swap cluster, we will not try to split all PMD swap
mappings, because we haven't enough information available for that
sometimes.  Later, when the PMD swap mapping is duplicated or swapin,
etc, the PMD swap mapping will be split and fallback to the PTE
operation.

When a THP is added into swap cache, the SWAP_HAS_CACHE flag will be
set in the swap_map[offset] of all swap slots inside the huge swap
cluster backing the THP.  This huge swap cluster will not be split
unless the THP is split even if its PMD swap mapping count dropped to
0.  Later, when the THP is removed from swap cache, the SWAP_HAS_CACHE
flag will be cleared in the swap_map[offset] of all swap slots inside
the huge swap cluster.  And this huge swap cluster will be split if
its PMD swap mapping count is 0.

The first parameter of swap_duplicate() is changed to return the swap
entry to call add_swap_count_continuation() for.  Because we may need
to call it for a swap entry in the middle of a huge swap cluster.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/swap.h |   9 +++--
 mm/memory.c  |   2 +-
 mm/rmap.c|   2 +-
 mm/swap_state.c  |   2 +-
 mm/swapfile.c| 107 ++-
 5 files changed, 97 insertions(+), 25 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f32f94639b13..3149cdb52e6d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -451,8 +451,8 @@ extern swp_entry_t get_swap_page_of_type(int);
 extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
-extern int swap_duplicate(swp_entry_t);
-extern int swapcache_prepare(swp_entry_t);
+extern int swap_duplicate(swp_entry_t *entry, int entry_size);
+extern int swapcache_prepare(swp_entry_t entry, int entry_size);
 extern void swap_free(swp_entry_t);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
@@ -510,7 +510,8 @@ static inline void show_swap_cache_info(void)
 }
 
 #define free_swap_and_cache(e) ({(is_migration_entry(e) || 
is_device_private_entry(e));})

[PATCH -V5 RESEND 15/21] swap: Support to copy PMD swap mapping when fork()

2018-09-25 Thread Huang Ying
During fork, the page table need to be copied from parent to child.  A
PMD swap mapping need to be copied too and the swap reference count
need to be increased.

When the huge swap cluster has been split already, we need to split
the PMD swap mapping and fallback to PTE copying.

When swap count continuation failed to allocate a page with
GFP_ATOMIC, we need to unlock the spinlock and try again with
GFP_KERNEL.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/huge_memory.c | 72 
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 35c7243720bc..c569e5e8ee17 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -941,6 +941,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
mm_struct *src_mm,
if (unlikely(!pgtable))
goto out;
 
+retry:
dst_ptl = pmd_lock(dst_mm, dst_pmd);
src_ptl = pmd_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -948,26 +949,67 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
mm_struct *src_mm,
ret = -EAGAIN;
pmd = *src_pmd;
 
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
if (unlikely(is_swap_pmd(pmd))) {
swp_entry_t entry = pmd_to_swp_entry(pmd);
 
-   VM_BUG_ON(!is_pmd_migration_entry(pmd));
-   if (is_write_migration_entry(entry)) {
-   make_migration_entry_read(&entry);
-   pmd = swp_entry_to_pmd(entry);
-   if (pmd_swp_soft_dirty(*src_pmd))
-   pmd = pmd_swp_mksoft_dirty(pmd);
-   set_pmd_at(src_mm, addr, src_pmd, pmd);
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+   if (is_migration_entry(entry)) {
+   if (is_write_migration_entry(entry)) {
+   make_migration_entry_read(&entry);
+   pmd = swp_entry_to_pmd(entry);
+   if (pmd_swp_soft_dirty(*src_pmd))
+   pmd = pmd_swp_mksoft_dirty(pmd);
+   set_pmd_at(src_mm, addr, src_pmd, pmd);
+   }
+   add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+   mm_inc_nr_ptes(dst_mm);
+   pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+   set_pmd_at(dst_mm, addr, dst_pmd, pmd);
+   ret = 0;
+   goto out_unlock;
}
-   add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-   mm_inc_nr_ptes(dst_mm);
-   pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
-   set_pmd_at(dst_mm, addr, dst_pmd, pmd);
-   ret = 0;
-   goto out_unlock;
-   }
 #endif
+   if (IS_ENABLED(CONFIG_THP_SWAP) && !non_swap_entry(entry)) {
+   ret = swap_duplicate(&entry, HPAGE_PMD_NR);
+   if (!ret) {
+   add_mm_counter(dst_mm, MM_SWAPENTS,
+  HPAGE_PMD_NR);
+   mm_inc_nr_ptes(dst_mm);
+   pgtable_trans_huge_deposit(dst_mm, dst_pmd,
+  pgtable);
+   set_pmd_at(dst_mm, addr, dst_pmd, pmd);
+   /* make sure dst_mm is on swapoff's mmlist. */
+   if (unlikely(list_empty(&dst_mm->mmlist))) {
+   spin_lock(&mmlist_lock);
+   if (list_empty(&dst_mm->mmlist))
+   list_add(&dst_mm->mmlist,
+&src_mm->mmlist);
+   spin_unlock(&mmlist_lock);
+   }
+   } else if (ret == -ENOTDIR) {
+   /*
+* The huge swap cluster has been split, split
+* the PMD swap mapping and fallback to PTE
+*/
+   __split_huge_swap_pmd(vma, addr, src_pmd);
+   pte_free(dst_mm, pgtable);
+   } else if (ret == -ENOMEM) {
+   spin_unlock(src_ptl);
+   spin_unlock(dst_ptl);
+   ret = add_swap_count_continuation(entry,
+ GFP_KERNEL);
+ 

[PATCH -V5 RESEND 16/21] swap: Free PMD swap mapping when zap_huge_pmd()

2018-09-25 Thread Huang Ying
For a PMD swap mapping, zap_huge_pmd() will clear the PMD and call
free_swap_and_cache() to decrease the swap reference count and maybe
free or split the huge swap cluster and the THP in swap cache.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/huge_memory.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c569e5e8ee17..accbd54d0ed4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2019,7 +2019,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
spin_unlock(ptl);
if (is_huge_zero_pmd(orig_pmd))
tlb_remove_page_size(tlb, pmd_page(orig_pmd), 
HPAGE_PMD_SIZE);
-   } else if (is_huge_zero_pmd(orig_pmd)) {
+   } else if (pmd_present(orig_pmd) && is_huge_zero_pmd(orig_pmd)) {
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
@@ -2032,17 +2032,27 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
page_remove_rmap(page, true);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(!PageHead(page), page);
-   } else if (thp_migration_supported()) {
-   swp_entry_t entry;
-
-   VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
-   entry = pmd_to_swp_entry(orig_pmd);
-   page = pfn_to_page(swp_offset(entry));
+   } else {
+   swp_entry_t entry = pmd_to_swp_entry(orig_pmd);
+
+   if (thp_migration_supported() &&
+   is_migration_entry(entry))
+   page = pfn_to_page(swp_offset(entry));
+   else if (IS_ENABLED(CONFIG_THP_SWAP) &&
+!non_swap_entry(entry))
+   free_swap_and_cache(entry, HPAGE_PMD_NR);
+   else {
+   WARN_ONCE(1,
+"Non present huge pmd without pmd migration or swap enabled!");
+   goto unlock;
+   }
flush_needed = 0;
-   } else
-   WARN_ONCE(1, "Non present huge pmd without pmd 
migration enabled!");
+   }
 
-   if (PageAnon(page)) {
+   if (!page) {
+   zap_deposited_table(tlb->mm, pmd);
+   add_mm_counter(tlb->mm, MM_SWAPENTS, -HPAGE_PMD_NR);
+   } else if (PageAnon(page)) {
zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
} else {
@@ -2050,7 +2060,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct 
vm_area_struct *vma,
zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, mm_counter_file(page), 
-HPAGE_PMD_NR);
}
-
+unlock:
spin_unlock(ptl);
if (flush_needed)
tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
-- 
2.16.4



[PATCH -V5 RESEND 07/21] swap: Support PMD swap mapping in split_swap_cluster()

2018-09-25 Thread Huang Ying
When splitting a THP in swap cache or failing to allocate a THP when
swapin a huge swap cluster, the huge swap cluster will be split.  In
addition to clear the huge flag of the swap cluster, the PMD swap
mapping count recorded in cluster_count() will be set to 0.  But we
will not touch PMD swap mappings themselves, because it is hard to
find them all sometimes.  When the PMD swap mappings are operated
later, it will be found that the huge swap cluster has been split and
the PMD swap mappings will be split at that time.

Unless splitting a THP in swap cache (specified via "force"
parameter), split_swap_cluster() will return -EEXIST if there is
SWAP_HAS_CACHE flag in swap_map[offset].  Because this indicates there
is a THP corresponds to this huge swap cluster, and it isn't desired
to split the THP.

When splitting a THP in swap cache, the position to call
split_swap_cluster() is changed to before unlocking sub-pages.  So
that all sub-pages will be kept locked from the THP has been split to
the huge swap cluster is split.  This makes the code much easier to be
reasoned.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/swap.h |  6 --
 mm/huge_memory.c | 18 ++--
 mm/swapfile.c| 58 +---
 3 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5bd54b6fd4a1..48c159994438 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -616,11 +616,13 @@ static inline swp_entry_t get_swap_page(struct page *page)
 
 #endif /* CONFIG_SWAP */
 
+#define SSC_SPLIT_CACHED   0x1
+
 #ifdef CONFIG_THP_SWAP
-extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster(swp_entry_t entry, unsigned long flags);
 extern int split_swap_cluster_map(swp_entry_t entry);
 #else
-static inline int split_swap_cluster(swp_entry_t entry)
+static inline int split_swap_cluster(swp_entry_t entry, unsigned long flags)
 {
return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3ea7318fcdcd..e5d995195fd9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2497,6 +2497,17 @@ static void __split_huge_page(struct page *page, struct 
list_head *list,
 
unfreeze_page(head);
 
+   /*
+* Split swap cluster before unlocking sub-pages.  So all
+* sub-pages will be kept locked from THP has been split to
+* swap cluster is split.
+*/
+   if (PageSwapCache(head)) {
+   swp_entry_t entry = { .val = page_private(head) };
+
+   split_swap_cluster(entry, SSC_SPLIT_CACHED);
+   }
+
for (i = 0; i < HPAGE_PMD_NR; i++) {
struct page *subpage = head + i;
if (subpage == page)
@@ -2723,12 +2734,7 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
__dec_node_page_state(page, NR_SHMEM_THPS);
spin_unlock(&pgdata->split_queue_lock);
__split_huge_page(page, list, flags);
-   if (PageSwapCache(head)) {
-   swp_entry_t entry = { .val = page_private(head) };
-
-   ret = split_swap_cluster(entry);
-   } else
-   ret = 0;
+   ret = 0;
} else {
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
pr_alert("total_mapcount: %u, page_count(): %u\n",
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 16723b9d971a..ef2b42c199c0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1469,23 +1469,6 @@ void put_swap_page(struct page *page, swp_entry_t entry)
unlock_cluster_or_swap_info(si, ci);
 }
 
-#ifdef CONFIG_THP_SWAP
-int split_swap_cluster(swp_entry_t entry)
-{
-   struct swap_info_struct *si;
-   struct swap_cluster_info *ci;
-   unsigned long offset = swp_offset(entry);
-
-   si = _swap_info_get(entry);
-   if (!si)
-   return -EBUSY;
-   ci = lock_cluster(si, offset);
-   cluster_clear_huge(ci);
-   unlock_cluster(ci);
-   return 0;
-}
-#endif
-
 static int swp_entry_cmp(const void *ent1, const void *ent2)
 {
const swp_entry_t *e1 = ent1, *e2 = ent2;
@@ -4064,6 +4047,47 @@ int split_swap_cluster_map(swp_entry_t entry)
unlock_cluster(ci);
return 0;
 }
+
+/*
+ * We will not try to split all PMD swap mappings to the swap cluster,
+ * because we haven't enough information available for that.  Later,
+ * when the PMD swap mapping is duplicated or swapin, etc, the PMD
+ * swap mapping will be split and fallback to the PTE operations.
+ */
+int split_swap_cluster(swp_entry_t entry, unsigned long flags)
+{
+   struct swap_info_struct *si;
+   struct swap_cluster_info *ci

[PATCH -V5 RESEND 12/21] swap: Support PMD swap mapping in swapoff

2018-09-25 Thread Huang Ying
During swapoff, for a huge swap cluster, we need to allocate a THP,
read its contents into the THP and unuse the PMD and PTE swap mappings
to it.  If failed to allocate a THP, the huge swap cluster will be
split.

During unuse, if it is found that the swap cluster mapped by a PMD
swap mapping is split already, we will split the PMD swap mapping and
unuse the PTEs.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/asm-generic/pgtable.h | 14 +--
 include/linux/huge_mm.h   |  8 
 mm/huge_memory.c  |  4 +-
 mm/swapfile.c | 86 ++-
 4 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index eb1e9d17371b..d64cef2bff04 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -931,22 +931,12 @@ static inline int 
pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
barrier();
 #endif
/*
-* !pmd_present() checks for pmd migration entries
-*
-* The complete check uses is_pmd_migration_entry() in linux/swapops.h
-* But using that requires moving current function and 
pmd_trans_unstable()
-* to linux/swapops.h to resovle dependency, which is too much code 
move.
-*
-* !pmd_present() is equivalent to is_pmd_migration_entry() currently,
-* because !pmd_present() pages can only be under migration not swapped
-* out.
-*
-* pmd_none() is preseved for future condition checks on pmd migration
+* pmd_none() is preseved for future condition checks on pmd swap
 * entries and not confusing with this function name, although it is
 * redundant with !pmd_present().
 */
if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
-   (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && 
!pmd_present(pmdval)))
+   (IS_ENABLED(CONFIG_HAVE_PMD_SWAP_ENTRY) && !pmd_present(pmdval)))
return 1;
if (unlikely(pmd_bad(pmdval))) {
pmd_clear_bad(pmd);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9dedff974def..25ba9b5f1e60 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -406,6 +406,8 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct 
vm_area_struct *vma)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_THP_SWAP
+extern int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+  unsigned long address, pmd_t orig_pmd);
 extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 static inline bool transparent_hugepage_swapin_enabled(
@@ -431,6 +433,12 @@ static inline bool transparent_hugepage_swapin_enabled(
return false;
 }
 #else /* CONFIG_THP_SWAP */
+static inline int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long address, pmd_t orig_pmd)
+{
+   return 0;
+}
+
 static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4d41ce83e3b9..18da840bd049 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1666,8 +1666,8 @@ static void __split_huge_swap_pmd(struct vm_area_struct 
*vma,
 }
 
 #ifdef CONFIG_THP_SWAP
-static int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-  unsigned long address, pmd_t orig_pmd)
+int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+   unsigned long address, pmd_t orig_pmd)
 {
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 3fe50f1da0a0..64067ee6a09c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1931,6 +1931,11 @@ static inline int pte_same_as_swp(pte_t pte, pte_t 
swp_pte)
return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
 }
 
+static inline int pmd_same_as_swp(pmd_t pmd, pmd_t swp_pmd)
+{
+   return pmd_same(pmd_swp_clear_soft_dirty(pmd), swp_pmd);
+}
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
@@ -1992,6 +1997,53 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t 
*pmd,
return ret;
 }
 
+#ifdef CONFIG_THP_SWAP
+static int unuse_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+unsigned long addr, swp_entry_t entry, struct page *page)
+{
+   struct mem_cgroup *memcg;
+   spinlock_t *ptl;
+   int ret = 1;
+
+   if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
+ &memcg, true)) {
+   ret = -ENOMEM;
+

Re: [PATCH v2 2/2] locking/pvqspinlock, hv: Enable PV qspinlock for Hyper-V

2018-09-25 Thread Yi Sun
On 18-09-21 17:02:54, Michael Kelley (EOSG) wrote:
> From: Yi Sun  Sent: Friday, September 21, 2018 
> 12:25 AM
> > +
> > +#define pr_fmt(fmt) "hv: " fmt
> 
> Other Hyper-V messages use "Hyper-V: " as the prefix, not "hv: ".  Take
> a quick look at 'dmesg' output for reference.
> 
Will modify this. Thanks!

> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> Some of these #includes look like they might be leftovers from
> some other code.  Please check and see whether kernel_stat.h,
> debugsfs.h, log2.h, and gfp.h are actually needed.
> 
Sure, I will check them.

> > +static void hv_qlock_wait(u8 *byte, u8 val)
> > +{
> > +   unsigned long msr_val;
> > +
> > +   if (READ_ONCE(*byte) != val)
> > +   return;
> > +
> > +   /*
> > +* Read HV_X64_MSR_GUEST_IDLE MSR can trigger the guest's
> > +* transition to the idle power state which can be exited
> > +* by an IPI even if IF flag is disabled.
> > +*/
> > +   if (ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)
> 
> I can't see a case where this test is actually needed.  hv_qlock_wait()
> can only get called if the flag is set when hv_init_spinlocks() is run, and
> the flag value doesn't change after it is set.
> 
Yes, it is redundant. Will remove it.

> > +   rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val);
> > +}
> 
> Michael


[PATCH -V5 RESEND 21/21] swap: Update help of CONFIG_THP_SWAP

2018-09-25 Thread Huang Ying
The help of CONFIG_THP_SWAP is updated to reflect the latest progress
of THP (Tranparent Huge Page) swap optimization.

Signed-off-by: "Huang, Ying" 
Reviewed-by: Dan Williams 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index b7f7fb145d0f..061d4e824506 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -419,8 +419,6 @@ config THP_SWAP
depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
help
  Swap transparent huge pages in one piece, without splitting.
- XXX: For now, swap cluster backing transparent huge page
- will be split after swapout.
 
  For selection by architectures with reasonable THP sizes.
 
-- 
2.16.4



[PATCH -V5 RESEND 11/21] swap: Add sysfs interface to configure THP swapin

2018-09-25 Thread Huang Ying
Swapin a THP as a whole isn't desirable in some situations.  For
example, for completely random access pattern, swapin a THP in one
piece will inflate the reading greatly.  So a sysfs interface:
/sys/kernel/mm/transparent_hugepage/swapin_enabled is added to
configure it.  Three options as follow are provided,

- always: THP swapin will be enabled always

- madvise: THP swapin will be enabled only for VMA with VM_HUGEPAGE
  flag set.

- never: THP swapin will be disabled always

The default configuration is: madvise.

During page fault, if a PMD swap mapping is found and THP swapin is
disabled, the huge swap cluster and the PMD swap mapping will be split
and fallback to normal page swapin.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 Documentation/admin-guide/mm/transhuge.rst | 21 +++
 include/linux/huge_mm.h| 31 ++
 mm/huge_memory.c   | 94 --
 3 files changed, 127 insertions(+), 19 deletions(-)

diff --git a/Documentation/admin-guide/mm/transhuge.rst 
b/Documentation/admin-guide/mm/transhuge.rst
index 85e33f785fd7..23aefb17101c 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -160,6 +160,27 @@ Some userspace (such as a test program, or an optimized 
memory allocation
 
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
 
+Transparent hugepage may be swapout and swapin in one piece without
+splitting.  This will improve the utility of transparent hugepage but
+may inflate the read/write too.  So whether to enable swapin
+transparent hugepage in one piece can be configured as follow.
+
+   echo always >/sys/kernel/mm/transparent_hugepage/swapin_enabled
+   echo madvise >/sys/kernel/mm/transparent_hugepage/swapin_enabled
+   echo never >/sys/kernel/mm/transparent_hugepage/swapin_enabled
+
+always
+   Attempt to allocate a transparent huge page and read it from
+   swap space in one piece every time.
+
+never
+   Always split the swap space and PMD swap mapping and swapin
+   the fault normal page during swapin.
+
+madvise
+   Only swapin the transparent huge page in one piece for
+   MADV_HUGEPAGE madvise regions.
+
 khugepaged will be automatically started when
 transparent_hugepage/enabled is set to "always" or "madvise, and it'll
 be automatically shutdown if it's set to "never".
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c2b8ced6fc2b..9dedff974def 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -63,6 +63,8 @@ enum transparent_hugepage_flag {
 #ifdef CONFIG_DEBUG_VM
TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
 #endif
+   TRANSPARENT_HUGEPAGE_SWAPIN_FLAG,
+   TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG,
 };
 
 struct kobject;
@@ -405,11 +407,40 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct 
vm_area_struct *vma)
 
 #ifdef CONFIG_THP_SWAP
 extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
+
+static inline bool transparent_hugepage_swapin_enabled(
+   struct vm_area_struct *vma)
+{
+   if (vma->vm_flags & VM_NOHUGEPAGE)
+   return false;
+
+   if (is_vma_temporary_stack(vma))
+   return false;
+
+   if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+   return false;
+
+   if (transparent_hugepage_flags &
+   (1 << TRANSPARENT_HUGEPAGE_SWAPIN_FLAG))
+   return true;
+
+   if (transparent_hugepage_flags &
+   (1 << TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG))
+   return !!(vma->vm_flags & VM_HUGEPAGE);
+
+   return false;
+}
 #else /* CONFIG_THP_SWAP */
 static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
return 0;
 }
+
+static inline bool transparent_hugepage_swapin_enabled(
+   struct vm_area_struct *vma)
+{
+   return false;
+}
 #endif /* CONFIG_THP_SWAP */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6f8676d6cba0..4d41ce83e3b9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -57,7 +57,8 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #endif
(1

[PATCH -V5 RESEND 10/21] swap: Support to count THP swapin and its fallback

2018-09-25 Thread Huang Ying
2 new /proc/vmstat fields are added, "thp_swapin" and
"thp_swapin_fallback" to count swapin a THP from swap device in one
piece and fallback to normal page swapin.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 Documentation/admin-guide/mm/transhuge.rst |  8 
 include/linux/vm_event_item.h  |  2 ++
 mm/huge_memory.c   |  4 +++-
 mm/page_io.c   | 15 ---
 mm/vmstat.c|  2 ++
 5 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/mm/transhuge.rst 
b/Documentation/admin-guide/mm/transhuge.rst
index 7ab93a8404b9..85e33f785fd7 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -364,6 +364,14 @@ thp_swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
 
+thp_swpin
+   is incremented every time a huge page is swapin in one piece
+   without splitting.
+
+thp_swpin_fallback
+   is incremented if a huge page has to be split during swapin.
+   Usually because failed to allocate a huge page.
+
 As the system ages, allocating huge pages may be expensive as the
 system uses memory compaction to copy data around memory to free a
 huge page for use. There are some counters in ``/proc/vmstat`` to help
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 47a3441cf4c4..c20b655cfdcc 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -88,6 +88,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
THP_ZERO_PAGE_ALLOC_FAILED,
THP_SWPOUT,
THP_SWPOUT_FALLBACK,
+   THP_SWPIN,
+   THP_SWPIN_FALLBACK,
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
BALLOON_INFLATE,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 747879cd0e90..6f8676d6cba0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1668,8 +1668,10 @@ int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t 
orig_pmd)
/* swapoff occurs under us */
} else if (ret == -EINVAL)
ret = 0;
-   else
+   else {
+   count_vm_event(THP_SWPIN_FALLBACK);
goto fallback;
+   }
}
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
goto out;
diff --git a/mm/page_io.c b/mm/page_io.c
index aafd19ec1db4..362254b99955 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -348,6 +348,15 @@ int __swap_writepage(struct page *page, struct 
writeback_control *wbc,
return ret;
 }
 
+static inline void count_swpin_vm_event(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+   if (unlikely(PageTransHuge(page)))
+   count_vm_event(THP_SWPIN);
+#endif
+   count_vm_events(PSWPIN, hpage_nr_pages(page));
+}
+
 int swap_readpage(struct page *page, bool synchronous)
 {
struct bio *bio;
@@ -371,7 +380,7 @@ int swap_readpage(struct page *page, bool synchronous)
 
ret = mapping->a_ops->readpage(swap_file, page);
if (!ret)
-   count_vm_event(PSWPIN);
+   count_swpin_vm_event(page);
return ret;
}
 
@@ -382,7 +391,7 @@ int swap_readpage(struct page *page, bool synchronous)
unlock_page(page);
}
 
-   count_vm_event(PSWPIN);
+   count_swpin_vm_event(page);
return 0;
}
 
@@ -401,7 +410,7 @@ int swap_readpage(struct page *page, bool synchronous)
get_task_struct(current);
bio->bi_private = current;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
-   count_vm_event(PSWPIN);
+   count_swpin_vm_event(page);
bio_get(bio);
qc = submit_bio(bio);
while (synchronous) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e3fb28d2c923..b954131d9e28 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1264,6 +1264,8 @@ const char * const vmstat_text[] = {
"thp_zero_page_alloc_failed",
"thp_swpout",
"thp_swpout_fallback",
+   "thp_swpin",
+   "thp_swpin_fallback",
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
"balloon_inflate",
-- 
2.16.4



[PATCH -V5 RESEND 13/21] swap: Support PMD swap mapping in madvise_free()

2018-09-25 Thread Huang Ying
When madvise_free() found a PMD swap mapping, if only part of the huge
swap cluster is operated on, the PMD swap mapping will be split and
fallback to PTE swap mapping processing.  Otherwise, if all huge swap
cluster is operated on, free_swap_and_cache() will be called to
decrease the PMD swap mapping count and probably free the swap space
and the THP in swap cache too.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 mm/huge_memory.c | 54 +++---
 mm/madvise.c |  2 +-
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 18da840bd049..aee8614e99f7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1844,6 +1844,15 @@ int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t 
orig_pmd)
 }
 #endif
 
+static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
+{
+   pgtable_t pgtable;
+
+   pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+   pte_free(mm, pgtable);
+   mm_dec_nr_ptes(mm);
+}
+
 /*
  * Return true if we do MADV_FREE successfully on entire pmd page.
  * Otherwise, return false.
@@ -1864,15 +1873,39 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
struct vm_area_struct *vma,
goto out_unlocked;
 
orig_pmd = *pmd;
-   if (is_huge_zero_pmd(orig_pmd))
-   goto out;
-
if (unlikely(!pmd_present(orig_pmd))) {
-   VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(orig_pmd));
-   goto out;
+   swp_entry_t entry = pmd_to_swp_entry(orig_pmd);
+
+   if (is_migration_entry(entry)) {
+   VM_BUG_ON(!thp_migration_supported());
+   goto out;
+   } else if (IS_ENABLED(CONFIG_THP_SWAP) &&
+  !non_swap_entry(entry)) {
+   /*
+* If part of THP is discarded, split the PMD
+* swap mapping and operate on the PTEs
+*/
+   if (next - addr != HPAGE_PMD_SIZE) {
+   unsigned long haddr = addr & HPAGE_PMD_MASK;
+
+   __split_huge_swap_pmd(vma, haddr, pmd);
+   goto out;
+   }
+   free_swap_and_cache(entry, HPAGE_PMD_NR);
+   pmd_clear(pmd);
+   zap_deposited_table(mm, pmd);
+   if (current->mm == mm)
+   sync_mm_rss(mm);
+   add_mm_counter(mm, MM_SWAPENTS, -HPAGE_PMD_NR);
+   ret = true;
+   goto out;
+   } else
+   VM_BUG_ON(1);
}
 
+   if (is_huge_zero_pmd(orig_pmd))
+   goto out;
+
page = pmd_page(orig_pmd);
/*
 * If other processes are mapping this page, we couldn't discard
@@ -1918,15 +1951,6 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
struct vm_area_struct *vma,
return ret;
 }
 
-static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
-{
-   pgtable_t pgtable;
-
-   pgtable = pgtable_trans_huge_withdraw(mm, pmd);
-   pte_free(mm, pgtable);
-   mm_dec_nr_ptes(mm);
-}
-
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 pmd_t *pmd, unsigned long addr)
 {
diff --git a/mm/madvise.c b/mm/madvise.c
index 6fff1c1d2009..07ef599d4255 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -321,7 +321,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long 
addr,
unsigned long next;
 
next = pmd_addr_end(addr, end);
-   if (pmd_trans_huge(*pmd))
+   if (pmd_trans_huge(*pmd) || is_swap_pmd(*pmd))
if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next))
goto next;
 
-- 
2.16.4



[PATCH -V5 RESEND 09/21] swap: Swapin a THP in one piece

2018-09-25 Thread Huang Ying
With this patch, when page fault handler find a PMD swap mapping, it
will swap in a THP in one piece.  This avoids the overhead of
splitting/collapsing before/after the THP swapping.  And improves the
swap performance greatly for reduced page fault count etc.

do_huge_pmd_swap_page() is added in the patch to implement this.  It
is similar to do_swap_page() for normal page swapin.

If failing to allocate a THP, the huge swap cluster and the PMD swap
mapping will be split to fallback to normal page swapin.

If the huge swap cluster has been split already, the PMD swap mapping
will be split to fallback to normal page swapin.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
Cc: Daniel Jordan 
---
 include/linux/huge_mm.h |   9 +++
 mm/huge_memory.c| 174 
 mm/memory.c |  16 +++--
 3 files changed, 193 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 3fdb29bc250c..c2b8ced6fc2b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -403,4 +403,13 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct 
vm_area_struct *vma)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#ifdef CONFIG_THP_SWAP
+extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd);
+#else /* CONFIG_THP_SWAP */
+static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
+{
+   return 0;
+}
+#endif /* CONFIG_THP_SWAP */
+
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4d4a447c29a8..747879cd0e90 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -33,6 +33,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -1612,6 +1614,178 @@ static void __split_huge_swap_pmd(struct vm_area_struct 
*vma,
pmd_populate(mm, pmd, pgtable);
 }
 
+#ifdef CONFIG_THP_SWAP
+static int split_huge_swap_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+  unsigned long address, pmd_t orig_pmd)
+{
+   struct mm_struct *mm = vma->vm_mm;
+   spinlock_t *ptl;
+   int ret = 0;
+
+   ptl = pmd_lock(mm, pmd);
+   if (pmd_same(*pmd, orig_pmd))
+   __split_huge_swap_pmd(vma, address & HPAGE_PMD_MASK, pmd);
+   else
+   ret = -ENOENT;
+   spin_unlock(ptl);
+
+   return ret;
+}
+
+int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd)
+{
+   struct page *page;
+   struct mem_cgroup *memcg;
+   struct vm_area_struct *vma = vmf->vma;
+   unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+   swp_entry_t entry;
+   pmd_t pmd;
+   int i, locked, exclusive = 0, ret = 0;
+
+   entry = pmd_to_swp_entry(orig_pmd);
+   VM_BUG_ON(non_swap_entry(entry));
+   delayacct_set_flag(DELAYACCT_PF_SWAPIN);
+retry:
+   page = lookup_swap_cache(entry, NULL, vmf->address);
+   if (!page) {
+   page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, vma,
+haddr, false);
+   if (!page) {
+   /*
+* Back out if somebody else faulted in this pmd
+* while we released the pmd lock.
+*/
+   if (likely(pmd_same(*vmf->pmd, orig_pmd))) {
+   /*
+* Failed to allocate huge page, split huge swap
+* cluster, and fallback to swapin normal page
+*/
+   ret = split_swap_cluster(entry, 0);
+   /* Somebody else swapin the swap entry, retry */
+   if (ret == -EEXIST) {
+   ret = 0;
+   goto retry;
+   /* swapoff occurs under us */
+   } else if (ret == -EINVAL)
+   ret = 0;
+   else
+   goto fallback;
+   }
+   delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+   goto out;
+   }
+
+   /* Had to read the page from swap area: Major fault */
+   ret = VM_FAULT_MAJOR;
+   count_vm_event(PGMAJFAULT);
+   count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
+   } else if (!PageTransCompound(page))
+   goto fallback;
+
+   locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
+
+   delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+   if (!locked) {
+   ret |= VM_FAULT_RETRY;
+   goto out_release;
+

Re: [PATCH v4 0/4] staging: vc04_services: Fix checkpatch.pl errors

2018-09-25 Thread Dan Carpenter
Looks fine, thanks.

regards,
dan carpenter



Re: [PATCH 3/5] lib: Add umoddi3 and udivmoddi4 of GCC library routines

2018-09-25 Thread Andreas Schwab
On Sep 25 2018, Zong Li  wrote:

> The RV32 need the umoddi3 to do modulo when the operands are long long
> type, like other libraries implementation such as ucmpdi2, lshrdi3 and
> so on. I encounter the undefined reference 'umoddi3' when I use the in
> house dma driver, although it is in house driver, but I think that
> umoddi3 is a common function for RV32.

You probably should use the macros from  instead.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH V2 04/27] mmc: mmci: introduce dma_priv pointer to mmci_host

2018-09-25 Thread Ludovic BARRE




On 09/24/2018 08:52 PM, Ulf Hansson wrote:

On 21 September 2018 at 11:45, Ludovic Barre  wrote:

From: Ludovic Barre 

This patch introduces dma_priv pointer to define specific
needs for each dma engine. This patch is needed to prepare
sdmmc variant with internal dma which not use dmaengine API.

Signed-off-by: Ludovic Barre 
---
change v2:
-rename specific dma engine structure to mmci_dmae_next/priv
-remove dma prefixe of mmci_dmae_priv fields, rename "current"
field to "cur" this avoid build issue with "current" defined
in include/asm-generic/current.h

  drivers/mmc/host/mmci.c | 155 ++--
  drivers/mmc/host/mmci.h |  18 +-
  2 files changed, 99 insertions(+), 74 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 2f845f3..6de7c8d 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -415,31 +415,57 @@ static void mmci_init_sg(struct mmci_host *host, struct 
mmc_data *data)
   * no custom DMA interfaces are supported.
   */
  #ifdef CONFIG_DMA_ENGINE
+struct mmci_dmae_next {
+   struct dma_async_tx_descriptor *desc;
+   struct dma_chan *chan;
+   s32 cookie;
+};
+
+struct mmci_dmae_priv {
+   struct dma_chan *cur;
+   struct dma_chan *rx_channel;
+   struct dma_chan *tx_channel;
+   struct dma_async_tx_descriptor  *desc_current;
+   struct mmci_dmae_next next_data;
+   bool in_progress;


I am wondering whether it would make sense to keep the "bool
in_progress" in the struct mmci_host. I guess it will be used for all
dma variants anyway!?


Today it's only use in dma_engine config, not used by sdmmc variant.
But it's could moved to mmci_host structure, no problem.




+};
+
+#define mmci_dmae_inprogress(dmae) ((dmae)->in_progress)
+
  static int mmci_dma_setup(struct mmci_host *host)
  {
 const char *rxname, *txname;
+   struct mmci_dmae_priv *dmae;
+
+   dmae = devm_kzalloc(mmc_dev(host->mmc), sizeof(*dmae), GFP_KERNEL);
+   if (!dmae)
+   return -ENOMEM;

-   host->dma_rx_channel = dma_request_slave_channel(mmc_dev(host->mmc), 
"rx");
-   host->dma_tx_channel = dma_request_slave_channel(mmc_dev(host->mmc), 
"tx");
+   host->dma_priv = dmae;
+
+   dmae->rx_channel = dma_request_slave_channel(mmc_dev(host->mmc),
+"rx");
+   dmae->tx_channel = dma_request_slave_channel(mmc_dev(host->mmc),
+"tx");

 /* initialize pre request cookie */
-   host->next_data.cookie = 1;
+   dmae->next_data.cookie = 1;

 /*
  * If only an RX channel is specified, the driver will
  * attempt to use it bidirectionally, however if it is
  * is specified but cannot be located, DMA will be disabled.
  */
-   if (host->dma_rx_channel && !host->dma_tx_channel)
-   host->dma_tx_channel = host->dma_rx_channel;
+   if (dmae->rx_channel && !dmae->tx_channel)
+   dmae->tx_channel = dmae->rx_channel;

-   if (host->dma_rx_channel)
-   rxname = dma_chan_name(host->dma_rx_channel);
+   if (dmae->rx_channel)
+   rxname = dma_chan_name(dmae->rx_channel);
 else
 rxname = "none";

-   if (host->dma_tx_channel)
-   txname = dma_chan_name(host->dma_tx_channel);
+   if (dmae->tx_channel)
+   txname = dma_chan_name(dmae->tx_channel);
 else
 txname = "none";

@@ -450,15 +476,15 @@ static int mmci_dma_setup(struct mmci_host *host)
  * Limit the maximum segment size in any SG entry according to
  * the parameters of the DMA engine device.
  */
-   if (host->dma_tx_channel) {
-   struct device *dev = host->dma_tx_channel->device->dev;
+   if (dmae->tx_channel) {
+   struct device *dev = dmae->tx_channel->device->dev;
 unsigned int max_seg_size = dma_get_max_seg_size(dev);

 if (max_seg_size < host->mmc->max_seg_size)
 host->mmc->max_seg_size = max_seg_size;
 }
-   if (host->dma_rx_channel) {
-   struct device *dev = host->dma_rx_channel->device->dev;
+   if (dmae->rx_channel) {
+   struct device *dev = dmae->rx_channel->device->dev;
 unsigned int max_seg_size = dma_get_max_seg_size(dev);

 if (max_seg_size < host->mmc->max_seg_size)
@@ -477,21 +503,24 @@ static int mmci_dma_setup(struct mmci_host *host)
   */
  static inline void mmci_dma_release(struct mmci_host *host)
  {
-   if (host->dma_rx_channel)
-   dma_release_channel(host->dma_rx_channel);
-   if (host->dma_tx_channel)
-   dma_release_channel(host->dma_tx_channel);
-   host->dma_rx_channel = host->dma_tx_channel = NULL;
+   struct mmci_dmae_priv *dmae = host->dma_priv;
+
+   if (dmae->rx_

Re: [PATCH] mtd: spi-nor: cadence-quadspi: Use proper enum for dma_unmap_single

2018-09-25 Thread Tudor Ambarus
Hi, Nathan,

On 09/21/2018 01:29 PM, Nathan Chancellor wrote:
> Clang warns when one enumerated type is converted implicitly to another.
> 
> drivers/mtd/spi-nor/cadence-quadspi.c:962:47: warning: implicit
> conversion from enumeration type 'enum dma_transfer_direction' to
> different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
> dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
>   ~~~^~~
> ./include/linux/dma-mapping.h:428:66: note: expanded from macro
> 'dma_map_single'
> #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
>  ^
> drivers/mtd/spi-nor/cadence-quadspi.c:997:43: warning: implicit
> conversion from enumeration type 'enum dma_transfer_direction' to
> different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
> dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
> ~^~~
> ./include/linux/dma-mapping.h:429:70: note: expanded from macro
> 'dma_unmap_single'
> #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
>  ~~  ^
> 2 warnings generated.
> 
> Use the proper enums from dma_data_direction to satisfy Clang.
> 
> DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1
> DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2
> 
> Link: https://github.com/ClangBuiltLinux/linux/issues/108
> Signed-off-by: Nathan Chancellor 
> ---
>  drivers/mtd/spi-nor/cadence-quadspi.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c 
> b/drivers/mtd/spi-nor/cadence-quadspi.c
> index 8e714fbfa521..d0e57ac81098 100644
> --- a/drivers/mtd/spi-nor/cadence-quadspi.c
> +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
> @@ -959,7 +959,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, 
> u_char *buf,
>   return 0;
>   }
>  
> - dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
> + dma_dst = dma_map_single(nor->dev, buf, len, DMA_FROM_DEVICE);
>   if (dma_mapping_error(nor->dev, dma_dst)) {
>   dev_err(nor->dev, "dma mapping failed\n");
>   return -ENOMEM;
> @@ -994,7 +994,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, 
> u_char *buf,
>   }
>  
>  err_unmap:
> - dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
> + dma_unmap_single(nor->dev, dma_dst, len, DMA_TO_DEVICE);

Should have used DMA_FROM_DEVICE, as you did above. Otherwise looks good.

Cheers,
ta


Re: [PATCH V5 06/30] csky: Cache and TLB routines

2018-09-25 Thread Peter Zijlstra
On Mon, Sep 24, 2018 at 10:36:22PM +0800, Guo Ren wrote:
> diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h 
> b/arch/csky/abiv1/inc/abi/cacheflush.h
> new file mode 100644
> index 000..f0de49c
> --- /dev/null
> +++ b/arch/csky/abiv1/inc/abi/cacheflush.h
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> +
> +#ifndef __ABI_CSKY_CACHEFLUSH_H
> +#define __ABI_CSKY_CACHEFLUSH_H
> +
> +#include 
> +#include 
> +#include 
> +
> +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
> +extern void flush_dcache_page(struct page *);
> +
> +#define flush_cache_mm(mm)   cache_wbinv_all()
> +#define flush_cache_page(vma,page,pfn)   cache_wbinv_all()
> +#define flush_cache_dup_mm(mm)   cache_wbinv_all()
> +
> +#define flush_cache_range(mm,start,end)  cache_wbinv_range(start, end)
 ^^^ should be vma

> +#endif /* __ABI_CSKY_CACHEFLUSH_H */


> diff --git a/arch/csky/abiv1/inc/abi/tlb.h b/arch/csky/abiv1/inc/abi/tlb.h
> new file mode 100644
> index 000..6d461f3
> --- /dev/null
> +++ b/arch/csky/abiv1/inc/abi/tlb.h
> @@ -0,0 +1,12 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> +
> +#ifndef __ABI_CSKY_TLB_H
> +#define __ABI_CSKY_TLB_H
> +
> +#define tlb_start_vma(tlb, vma) \
> + do { \
> + if (!tlb->fullmm) \
> + cache_wbinv_all(); \
> + }  while (0)
> +#endif /* __ABI_CSKY_TLB_H */

That should be:

if (!tlb->fullmm)
flush_cache_range(vma, vma->vm_start, vma->vm_end);

Because as per the whole abiv1 vs abiv2, you don't need write back
invalidation for v2 at all, also, you only need to invalidate the vma
range, no reason to shoot everything down.

Also, I'll be shortly removing this:

  https://lkml.kernel.org/r/20180913092812.071989...@infradead.org

> diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h 
> b/arch/csky/abiv2/inc/abi/cacheflush.h
> new file mode 100644
> index 000..756beb7
> --- /dev/null
> +++ b/arch/csky/abiv2/inc/abi/cacheflush.h
> @@ -0,0 +1,40 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef __ABI_CSKY_CACHEFLUSH_H
> +#define __ABI_CSKY_CACHEFLUSH_H
> +
> +/* Keep includes the same across arches.  */
> +#include 
> +
> +/*
> + * The cache doesn't need to be flushed when TLB entries change when
> + * the cache is mapped to physical memory, not virtual memory
> + */
> +#define flush_cache_all()do { } while (0)
> +#define flush_cache_mm(mm)   do { } while (0)
> +#define flush_cache_dup_mm(mm)   do { } while (0)
> +#define flush_cache_range(vma, start, end)   do { } while (0)
 ^^^ like here..

> +#endif /* __ABI_CSKY_TLB_H */


RE: [PATCH 1/2] phy: zynqmp: Add phy driver for xilinx zynqmp phy core

2018-09-25 Thread Anurag Kumar Vulisha
HI Kishon,

Thanks a lot for spending your time in reviewing this patch. Please
find my comments inline

>-Original Message-
>From: Kishon Vijay Abraham I [mailto:kis...@ti.com]
>Sent: Tuesday, September 25, 2018 10:59 AM
>To: Anurag Kumar Vulisha ; Michal Simek
>; robh...@kernel.org; mark.rutl...@arm.com;
>vivek.gau...@codeaurora.org
>Cc: v.anuragku...@gmail.com; linux-kernel@vger.kernel.org; linux-arm-
>ker...@lists.infradead.org; devicet...@vger.kernel.org
>Subject: Re: [PATCH 1/2] phy: zynqmp: Add phy driver for xilinx zynqmp phy core
>
>Hi,
>
>On Wednesday 29 August 2018 07:37 PM, Anurag Kumar Vulisha wrote:
>> ZynqMP SoC has a Gigabit Transceiver with four lanes. All the high
>> speed peripherals such as USB, SATA, PCIE, Display Port and Ethernet
>> SGMII can rely on any of the four GT lanes for PHY layer. This patch
>> adds driver for that ZynqMP GT core.
>>
>> Signed-off-by: Anurag Kumar Vulisha 
>> ---
>>  drivers/phy/Kconfig|8 +
>>  drivers/phy/Makefile   |1 +
>>  drivers/phy/phy-zynqmp.c   | 1579
>
>>  include/dt-bindings/phy/phy.h  |2 +
>>  include/linux/phy/phy-zynqmp.h |   52 ++
>>  5 files changed, 1642 insertions(+)
>>  create mode 100644 drivers/phy/phy-zynqmp.c  create mode 100644
>> include/linux/phy/phy-zynqmp.h
>>
>> diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index
>> 5c8d452..14cf3330 100644
>> --- a/drivers/phy/Kconfig
>> +++ b/drivers/phy/Kconfig
>> @@ -40,6 +40,14 @@ config PHY_XGENE
>>  help
>>This option enables support for APM X-Gene SoC multi-purpose PHY.
>>
>> +config PHY_XILINX_ZYNQMP
>> +tristate "Xilinx ZynqMP PHY driver"
>> +depends on ARCH_ZYNQMP
>> +select GENERIC_PHY
>> +help
>> +  Enable this to support ZynqMP High Speed Gigabit Transceiver
>> +  that is part of ZynqMP SoC.
>> +
>>  source "drivers/phy/allwinner/Kconfig"
>>  source "drivers/phy/amlogic/Kconfig"
>>  source "drivers/phy/broadcom/Kconfig"
>> diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index
>> 84e3bd9..f2a8d27 100644
>> --- a/drivers/phy/Makefile
>> +++ b/drivers/phy/Makefile
>> @@ -7,6 +7,7 @@ obj-$(CONFIG_GENERIC_PHY)+= phy-core.o
>>  obj-$(CONFIG_PHY_LPC18XX_USB_OTG)   += phy-lpc18xx-usb-otg.o
>>  obj-$(CONFIG_PHY_XGENE) += phy-xgene.o
>>  obj-$(CONFIG_PHY_PISTACHIO_USB) += phy-pistachio-usb.o
>> +obj-$(CONFIG_PHY_XILINX_ZYNQMP) += phy-zynqmp.o
>>  obj-$(CONFIG_ARCH_SUNXI)+= allwinner/
>>  obj-$(CONFIG_ARCH_MESON)+= amlogic/
>>  obj-$(CONFIG_LANTIQ)+= lantiq/
>> diff --git a/drivers/phy/phy-zynqmp.c b/drivers/phy/phy-zynqmp.c new
>> file mode 100644 index 000..306cedd
>> --- /dev/null
>> +++ b/drivers/phy/phy-zynqmp.c
>> @@ -0,0 +1,1579 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * phy-zynqmp.c - PHY driver for Xilinx ZynqMP GT.
>> + *
>> + * Copyright (C) 2018 Xilinx Inc.
>> + *
>> + * Author: Anurag Kumar Vulisha 
>> + *
>> + * This driver is tested for USB, SATA and Display Port currently.
>> + * Other controllers PCIe and SGMII should also work but that is
>> + * experimental as of now.
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +/* Total number of controllers */
>> +#define CONTROLLERS_PER_LANE5
>> +
>> +/* USB pipe control parameters */
>> +#define PIPE_CLK_OFFSET 0x7c
>> +#define PIPE_POWER_OFFSET   0x80
>> +#define PIPE_CLK_ON 1
>> +#define PIPE_CLK_OFF0
>> +#define PIPE_POWER_ON   1
>> +#define PIPE_POWER_OFF  0
>> +
>> +
>> +/**
>> + * struct xpsgtr_ssc - structure to hold SSC settings for a lane
>> + * @refclk_rate: PLL reference clock frequency
>> + * @pll_ref_clk: value to be written to register for corresponding
>> +ref clk rate
>> + * @steps: number of steps of SSC (Spread Spectrum Clock)
>> + * @step_size: step size of each step  */ struct xpsgtr_ssc {
>> +u32 refclk_rate;
>> +u8  pll_ref_clk;
>> +u32 steps;
>> +u32 step_size;
>> +};
>> +
>> +/* lookup table to hold all settings needed for a ref clock frequency
>> +*/ static struct xpsgtr_ssc ssc_lookup[MAX_REFCLK] = {
>> +{1920, 0x05, 608, 264020},
>> +{2000, 0x06, 634, 243454},
>> +{2400, 0x07, 760, 168973},
>> +{2600, 0x08, 824, 143860},
>> +{2700, 0x09, 856, 86551},
>> +{3840, 0x0A, 1218, 65896},
>> +{4000, 0x0B, 634, 243454},
>> +{5200, 0x0C, 824, 143860},
>> +{1, 0x0D, 1058, 87533},
>> +{10800, 0x0E, 856, 86551},
>> +{12500, 0x0F, 992, 119497},
>> +{13500, 0x10, 1070, 55393},
>> +{15000, 0x11, 792, 187091}
>> +};
>> +
>> +/**
>> + * st

Re: [PATCH v2 2/2] treewide: use bus_find_device_by_fwnode

2018-09-25 Thread Silesh C V
Hello Greg,

On Tue, Sep 25, 2018 at 12:34 PM Greg Kroah-Hartman
 wrote:
>
> On Tue, Sep 25, 2018 at 10:58:48AM +0530, Silesh C V wrote:
> > Use bus_find_device_by_fwnode helper to find the device having a
> > specific firmware node on a bus.
> > ---
>
>
> No signed off by?
>

Oops.Sorry about that. Will send it again with Signed off by added.

Thanks,
Silesh


Re: [LKP] [mm, oom] 6209f6fc62: general_protection_fault:#[##]

2018-09-25 Thread Michal Hocko
On Tue 25-09-18 15:00:15, Rong Chen wrote:
> 
> 
> On 09/25/2018 02:06 PM, Michal Hocko wrote:
> > On Tue 25-09-18 13:48:20, kernel test robot wrote:
> > > FYI, we noticed the following commit (built with gcc-7):
> > > 
> > > commit: 6209f6fc62835d84c2a92d237588a114e39436ce ("mm, oom: rework 
> > > mmap_exit vs. oom_reaper synchronization")
> > > https://github.com/0day-ci/linux 
> > > UPDATE-20180911-024633/Tetsuo-Handa/mm-oom-Fix-unnecessary-killing-of-additional-processes/20180910-163452
> > Do you have a msg-id to the patch that has been tested?
> 
> message_id: 20180910125513.311-2-mho...@kernel.org

Thanks! It woudl be really great if this was a part of the report when
testing patches which are not mainline yet.

This patch resulting in a crash is quite surprising. The patch is RFC
and not tested yet but I will definitely have a look. Could you help me
some more and give faddr2line __oom_reap_task_mm+0x40 please?
-- 
Michal Hocko
SUSE Labs


Re: [PATCH 1/8] dt-bindings: rcar-gen3-phy-usb2: add no-otg-pins property

2018-09-25 Thread Simon Horman
On Fri, Sep 21, 2018 at 08:53:17PM +0900, Yoshihiro Shimoda wrote:
> This patch adds a new optional property "renesas,no-otg-pins" which
> a board does not provide proper otg pins.
> 
> Signed-off-by: Yoshihiro Shimoda 

Reviewed-by: Simon Horman 



Re: [PATCH v10 24/26] KVM: s390: device attrs to enable/disable AP interpretation

2018-09-25 Thread David Hildenbrand
On 24/09/2018 20:42, Tony Krowiak wrote:
> On 09/24/2018 12:25 PM, Tony Krowiak wrote:
>> On 09/24/2018 07:23 AM, David Hildenbrand wrote:
> 
> (...)
> 
>>> Will you also fixup this patch to expose KVM_S390_VM_CRYPTO_ENABLE_APIE
>>> only if supported by HW? (ap_instructions_available)
>>
>> Given that this patch DOES expose KVM_S390_VM_CRYPTO_ENABLE_APIE only if 
>> supported by HW, I assume you are talking about
>> KVM_S390_VM_CRYPTO_DISABLE_APIE. I didn't check 
>> ap_instructions_available() for disabling APIE because I didn't
>> think it necessary given that ECA.28 will be set to 0 (intercept) by 
>> default, whether AP instructions are installed or not; so why not allow 
>> disabling apie. I suppose from the perspective of consistency, since the 
>> kvm_s390_vm_has_attr() function checks ap_instructions_available() for 
>> both attributes, then it probably makes sense to add that check to 
>> KVM_S390_VM_CRYPTO_DISABLE_APIE here. Then again, we could make a change 
>> in ap_instructions_available() to allow KVM_S390_VM_CRYPTO_DISABLE_APIE 
>> regardless of whether AP instructions are available. It boils down to 
>> whether APIE needs to be dynamically disabled at some point when it has 
>> been enabled. The only case I can think of where that may be necessary 
>> is if a guest is migrated to a system without AP instructions. I don't 
>> think that can happen and may even be protected against precisely 
>> because the VM attributes won't be available on the target system due to 
>> no AP instructions. What say you?
>>
> David,
> 
> I'm sorry, I misinterpreted what you were asking for. Check out the 
> fixup! patch below and let me know if that is what you are looking for.
> If so, I will integrate that change and post v11 tomorrow (Tuesday 9/24).
> 
> ---8<---
> 
> From: Tony Krowiak 
> Date: Mon, 24 Sep 2018 14:18:37 -0400
> Subject: [FIXUP v10] fixup! KVM: s390: device attrs to enable/disable AP
>   interpretation
> 
> ---
>   arch/s390/kvm/kvm-s390.c | 9 -
>   1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 6654bb1fc26a..a528558baa78 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -901,6 +901,10 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, 
> struct kvm_device_attr *attr)
>   kvm->arch.crypto.apie = 1;
>   break;
>   case KVM_S390_VM_CRYPTO_DISABLE_APIE:
> + if (!ap_instructions_available()) {
> + mutex_unlock(&kvm->lock);
> + return -EOPNOTSUPP;
> + }
>   kvm->arch.crypto.apie = 0;
>   break;
>   default:
> @@ -1509,9 +1513,11 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, 
> struct kvm_device_attr *attr)
>   case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
>   case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
>   case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
> + ret = 0;
> + break;
>   case KVM_S390_VM_CRYPTO_ENABLE_APIE:
>   case KVM_S390_VM_CRYPTO_DISABLE_APIE:
> - ret = 0;
> + ret = ap_instructions_available();

Just a little remark, I guess we want to report 0 if available and
-ENXIO if not.

-- 

Thanks,

David / dhildenb


[PATCH v2] mtd: spi-nor: cadence-quadspi: Use proper enum for dma_unmap_single

2018-09-25 Thread Nathan Chancellor
Clang warns when one enumerated type is converted implicitly to another.

drivers/mtd/spi-nor/cadence-quadspi.c:962:47: warning: implicit
conversion from enumeration type 'enum dma_transfer_direction' to
different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
  ~~~^~~
./include/linux/dma-mapping.h:428:66: note: expanded from macro
'dma_map_single'
#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
     ^
drivers/mtd/spi-nor/cadence-quadspi.c:997:43: warning: implicit
conversion from enumeration type 'enum dma_transfer_direction' to
different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
~^~~
./include/linux/dma-mapping.h:429:70: note: expanded from macro
'dma_unmap_single'
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
 ~~  ^
2 warnings generated.

Use the proper enums from dma_data_direction to satisfy Clang.

DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2

Link: https://github.com/ClangBuiltLinux/linux/issues/108
Signed-off-by: Nathan Chancellor 
---

v1 -> v2:

* Fix second half of patch (previously incorrectly used DMA_TO_DEVICE),
  thanks to Tudor Ambarus for catching this.

 drivers/mtd/spi-nor/cadence-quadspi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c 
b/drivers/mtd/spi-nor/cadence-quadspi.c
index 8e714fbfa521..e24db817154e 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -959,7 +959,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, 
u_char *buf,
return 0;
}
 
-   dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
+   dma_dst = dma_map_single(nor->dev, buf, len, DMA_FROM_DEVICE);
if (dma_mapping_error(nor->dev, dma_dst)) {
dev_err(nor->dev, "dma mapping failed\n");
return -ENOMEM;
@@ -994,7 +994,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, 
u_char *buf,
}
 
 err_unmap:
-   dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
+   dma_unmap_single(nor->dev, dma_dst, len, DMA_FROM_DEVICE);
 
return 0;
 }
-- 
2.19.0



Re: [PATCH] mtd: spi-nor: cadence-quadspi: Use proper enum for dma_unmap_single

2018-09-25 Thread Nathan Chancellor
On Tue, Sep 25, 2018 at 10:24:04AM +0300, Tudor Ambarus wrote:
> Hi, Nathan,
> 
> On 09/21/2018 01:29 PM, Nathan Chancellor wrote:
> > Clang warns when one enumerated type is converted implicitly to another.
> > 
> > drivers/mtd/spi-nor/cadence-quadspi.c:962:47: warning: implicit
> > conversion from enumeration type 'enum dma_transfer_direction' to
> > different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
> > dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
> >   ~~~^~~
> > ./include/linux/dma-mapping.h:428:66: note: expanded from macro
> > 'dma_map_single'
> > #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
> >  ^
> > drivers/mtd/spi-nor/cadence-quadspi.c:997:43: warning: implicit
> > conversion from enumeration type 'enum dma_transfer_direction' to
> > different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
> > dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
> > ~^~~
> > ./include/linux/dma-mapping.h:429:70: note: expanded from macro
> > 'dma_unmap_single'
> > #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
> >  ~~  ^
> > 2 warnings generated.
> > 
> > Use the proper enums from dma_data_direction to satisfy Clang.
> > 
> > DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1
> > DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2
> > 
> > Link: https://github.com/ClangBuiltLinux/linux/issues/108
> > Signed-off-by: Nathan Chancellor 
> > ---
> >  drivers/mtd/spi-nor/cadence-quadspi.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c 
> > b/drivers/mtd/spi-nor/cadence-quadspi.c
> > index 8e714fbfa521..d0e57ac81098 100644
> > --- a/drivers/mtd/spi-nor/cadence-quadspi.c
> > +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
> > @@ -959,7 +959,7 @@ static int cqspi_direct_read_execute(struct spi_nor 
> > *nor, u_char *buf,
> > return 0;
> > }
> >  
> > -   dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
> > +   dma_dst = dma_map_single(nor->dev, buf, len, DMA_FROM_DEVICE);
> > if (dma_mapping_error(nor->dev, dma_dst)) {
> > dev_err(nor->dev, "dma mapping failed\n");
> > return -ENOMEM;
> > @@ -994,7 +994,7 @@ static int cqspi_direct_read_execute(struct spi_nor 
> > *nor, u_char *buf,
> > }
> >  
> >  err_unmap:
> > -   dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
> > +   dma_unmap_single(nor->dev, dma_dst, len, DMA_TO_DEVICE);
> 
> Should have used DMA_FROM_DEVICE, as you did above. Otherwise looks good.
> 
> Cheers,
> ta

Thank you very much for catching this, did several of these conversions
back to back and didn't look closely at this. I just sent a v2, I
appreciate the review!

Nathan


Re: [PATCH] gpiolib: Fix gpio_direction_* for single direction GPIOs

2018-09-25 Thread Linus Walleij
Hi Ricardo,

thanks for the patch and sorry for taking time before responding.

On Fri, Sep 21, 2018 at 12:36 PM Ricardo Ribalda Delgado
 wrote:
> GPIOs with no programmable direction are not required to implement
> direction_output nor direction_input.
>
> If we try to set an output direction on an output-only GPIO or input
> direction on an input-only GPIO simply return 0.
>
> This allows this single direction GPIO to be used by libgpiod.
>
> Signed-off-by: Ricardo Ribalda Delgado 

It makes perfect sense, patch applied.

I'll go in and add some comments to the code so I understand it
right as well in the future.

Yours,
Linus Walleij


Re: WARNING: kmalloc bug in input_mt_init_slots

2018-09-25 Thread Dmitry Vyukov
On Mon, Sep 24, 2018 at 8:41 PM, Dmitry Torokhov
 wrote:
> On Mon, Sep 24, 2018 at 03:55:04PM +, Christopher Lameter wrote:
>> On Mon, 24 Sep 2018, Dmitry Vyukov wrote:
>>
>> > On Mon, Sep 24, 2018 at 5:08 PM, Christopher Lameter  
>> > wrote:
>> > > On Sun, 23 Sep 2018, Dmitry Vyukov wrote:
>> > >
>> > >> What was the motivation behind that WARNING about large allocations in
>> > >> kmalloc? Why do we want to know about them? Is the general policy that
>> > >> kmalloc calls with potentially large size requests need to use NOWARN?
>> > >> If this WARNING still considered useful? Or we should change it to
>> > >> pr_err?
>> > >
>> > > In general large allocs should be satisfied by the page allocator. The
>> > > slab allocators are used for allocating and managing small objects. The
>> > > page allocator has mechanisms to deal with large objects (compound pages,
>> > > multiple page sized allocs etc).
>> >
>> > I am asking more about the status of this warning. If it fires in
>> > input_mt_init_slots(), does it mean that input_mt_init_slots() needs
>> > to be fixed? If not, then we need to change this warning to something
>> > else.
>>
>> Hmmm.. kmalloc falls back to the page allocator already?
>>
>> See
>>
>> static __always_inline void *kmalloc(size_t size, gfp_t flags)
>> {
>> if (__builtin_constant_p(size)) {
>
> It would not be a constant here though.
>
>> if (size > KMALLOC_MAX_CACHE_SIZE)
>> return kmalloc_large(size, flags);
>>
>>
>> Note that this uses KMALLOC_MAX_CACHE_SIZE which should be smaller than
>> KMALLOC_MAX_SIZE.
>>
>>
>> How large is the allocation? AFACIT nRequests larger than KMALLOC_MAX_SIZE
>> are larger than the maximum allowed by the page allocator. Thus the warning
>> and the NULL return.
>
> The size in this particular case is being derived from a value passed
> from userspace. Input core does not care about any limits on size of
> memory kmalloc() can support and is perfectly happy with getting NULL
> and telling userspace to go away with their silly requests by returning
> -ENOMEM.
>
> For the record: I definitely do not want to pre-sanitize size neither in
> uinput nor in input core.

Christopher,

Assuming that the size is large enough to fail in all allocators, is
this warning still useful? How? Should we remove it?


[PATCH v2 2/2] treewide: use bus_find_device_by_fwnode

2018-09-25 Thread Silesh C V
Use bus_find_device_by_fwnode helper to find the device having a
specific firmware node on a bus.

Signed-off-by: Silesh C V 
---
 drivers/hwtracing/coresight/of_coresight.c | 14 --
 drivers/i2c/i2c-core-of.c  |  9 ++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c |  8 +---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c |  8 +---
 drivers/nvmem/core.c   |  7 +--
 drivers/of/of_mdio.c   |  8 +---
 drivers/of/platform.c  |  7 +--
 drivers/spi/spi.c  | 10 +++---
 8 files changed, 14 insertions(+), 57 deletions(-)

diff --git a/drivers/hwtracing/coresight/of_coresight.c 
b/drivers/hwtracing/coresight/of_coresight.c
index 6880bee..8193b50 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -17,12 +17,6 @@
 #include 
 #include 
 
-
-static int of_dev_node_match(struct device *dev, void *data)
-{
-   return dev->of_node == data;
-}
-
 static struct device *
 of_coresight_get_endpoint_device(struct device_node *endpoint)
 {
@@ -32,8 +26,8 @@ static int of_dev_node_match(struct device *dev, void *data)
 * If we have a non-configurable replicator, it will be found on the
 * platform bus.
 */
-   dev = bus_find_device(&platform_bus_type, NULL,
- endpoint, of_dev_node_match);
+   dev = bus_find_device_by_fwnode(&platform_bus_type, NULL,
+   &endpoint->fwnode);
if (dev)
return dev;
 
@@ -41,8 +35,8 @@ static int of_dev_node_match(struct device *dev, void *data)
 * We have a configurable component - circle through the AMBA bus
 * looking for the device that matches the endpoint node.
 */
-   return bus_find_device(&amba_bustype, NULL,
-  endpoint, of_dev_node_match);
+   return bus_find_device_by_fwnode(&amba_bustype, NULL,
+   &endpoint->fwnode);
 }
 
 static void of_coresight_get_ports(const struct device_node *node,
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index 6cb7ad6..2b8ef8d 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -116,18 +116,13 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
of_node_put(bus);
 }
 
-static int of_dev_node_match(struct device *dev, void *data)
-{
-   return dev->of_node == data;
-}
-
 /* must call put_device() when done with returned i2c_client device */
 struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
 {
struct device *dev;
struct i2c_client *client;
 
-   dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+   dev = bus_find_device_by_fwnode(&i2c_bus_type, NULL, &node->fwnode);
if (!dev)
return NULL;
 
@@ -145,7 +140,7 @@ struct i2c_adapter *of_find_i2c_adapter_by_node(struct 
device_node *node)
struct device *dev;
struct i2c_adapter *adapter;
 
-   dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+   dev = bus_find_device_by_fwnode(&i2c_bus_type, NULL, &node->fwnode);
if (!dev)
return NULL;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c 
b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 081aa91..b0d418e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -4832,19 +4832,13 @@ static void hns_roce_v1_cleanup_eq_table(struct 
hns_roce_dev *hr_dev)
 };
 MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
 
-static int hns_roce_node_match(struct device *dev, void *fwnode)
-{
-   return dev->fwnode == fwnode;
-}
-
 static struct
 platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
 {
struct device *dev;
 
/* get the 'device' corresponding to the matching 'fwnode' */
-   dev = bus_find_device(&platform_bus_type, NULL,
- fwnode, hns_roce_node_match);
+   dev = bus_find_device_by_fwnode(&platform_bus_type, NULL, fwnode);
/* get the platform device */
return dev ? to_platform_device(dev) : NULL;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 16294cd..d5d7c88 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -758,17 +758,11 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device 
*dsaf_dev)
return (void *)misc_op;
 }
 
-static int hns_dsaf_dev_match(struct device *dev, void *fwnode)
-{
-   return dev->fwnode == fwnode;
-}
-
 struct
 platform_device *hns_dsaf_find_platform_device(struct fwnode_handle *fwnode)
 {
struct device *dev;
 
-   dev = bus_find_device(&platform_bus_type,

Re: [LKP] [mm, oom] 6209f6fc62: general_protection_fault:#[##]

2018-09-25 Thread Rong Chen




On 09/25/2018 03:31 PM, Michal Hocko wrote:

On Tue 25-09-18 15:00:15, Rong Chen wrote:


On 09/25/2018 02:06 PM, Michal Hocko wrote:

On Tue 25-09-18 13:48:20, kernel test robot wrote:

FYI, we noticed the following commit (built with gcc-7):

commit: 6209f6fc62835d84c2a92d237588a114e39436ce ("mm, oom: rework mmap_exit vs. 
oom_reaper synchronization")
https://github.com/0day-ci/linux 
UPDATE-20180911-024633/Tetsuo-Handa/mm-oom-Fix-unnecessary-killing-of-additional-processes/20180910-163452

Do you have a msg-id to the patch that has been tested?

message_id: 20180910125513.311-2-mho...@kernel.org

Thanks! It woudl be really great if this was a part of the report when
testing patches which are not mainline yet.

This patch resulting in a crash is quite surprising. The patch is RFC
and not tested yet but I will definitely have a look. Could you help me
some more and give faddr2line __oom_reap_task_mm+0x40 please?

$ faddr2line ./vmlinux __oom_reap_task_mm+0x40
__oom_reap_task_mm+0x40/0x175:
can_madv_dontneed_vma at mm/internal.h:48
 (inlined by) __oom_reap_task_mm at mm/oom_kill.c:505

Best Regards,
Rong Chen


Re: rtc: isl1208: avoid possible sysfs race

2018-09-25 Thread Alexandre Belloni
On 25/09/2018 06:46:24+, Denis OSTERLAND wrote:
> Hi,
> 
> just for clarification:
> This changes user-space representation from
> /sys/class/rtc/rtcN/device/{atrim,dtrim,usr}
> to
> /sys/class/rtc/rtcN/{atrim,dtrim,usr}
> and fixes the "mistake made back in 2006" you mention in
> https://patchwork.ozlabs.org/patch/881397/
> correct?
> 

This is correct. At some point, it would also be great to move from
atrim/dtrim to offset

> Regards Denis
> 
> Am Samstag, den 15.09.2018, 13:43 +0200 schrieb Alexandre Belloni:
> > Use rtc_add_group to add the common sysfs group to avoid a possible race
> > condition.
> > 
> > Signed-off-by: Alexandre Belloni 
> > ---
> >  drivers/rtc/rtc-isl1208.c | 11 +--
> >  1 file changed, 1 insertion(+), 10 deletions(-)
> > 
> > diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
> > index ea18a8f4bce0..e1e9434c9972 100644
> > --- a/drivers/rtc/rtc-isl1208.c
> > +++ b/drivers/rtc/rtc-isl1208.c
> > @@ -804,7 +804,7 @@ isl1208_probe(struct i2c_client *client, const struct 
> > i2c_device_id *id)
> >     evdet_irq = of_irq_get_byname(np, "evdet");
> >     }
> >  
> > -   rc = sysfs_create_group(&client->dev.kobj, &isl1208_rtc_sysfs_files);
> > +   rc = rtc_add_group(rtc, &isl1208_rtc_sysfs_files);
> >     if (rc)
> >     return rc;
> >  
> > @@ -821,14 +821,6 @@ isl1208_probe(struct i2c_client *client, const struct 
> > i2c_device_id *id)
> >     return rtc_register_device(rtc);
> >  }
> >  
> > -static int
> > -isl1208_remove(struct i2c_client *client)
> > -{
> > -   sysfs_remove_group(&client->dev.kobj, &isl1208_rtc_sysfs_files);
> > -
> > -   return 0;
> > -}
> > -
> >  static const struct i2c_device_id isl1208_id[] = {
> >     { "isl1208", TYPE_ISL1208 },
> >     { "isl1218", TYPE_ISL1218 },
> > @@ -851,7 +843,6 @@ static struct i2c_driver isl1208_driver = {
> >     .of_match_table = of_match_ptr(isl1208_of_match),
> >     },
> >     .probe = isl1208_probe,
> > -   .remove = isl1208_remove,
> >     .id_table = isl1208_id,
> >  };
> >  
> 
> Diehl Connectivity Solutions GmbH
> Geschäftsführung: Horst Leonberger
> Sitz der Gesellschaft: Nürnberg - Registergericht: Amtsgericht
> Nürnberg: HRB 32315
> ___
> 
> Der Inhalt der vorstehenden E-Mail ist nicht rechtlich bindend. Diese E-Mail 
> enthaelt vertrauliche und/oder rechtlich geschuetzte Informationen.
> Informieren Sie uns bitte, wenn Sie diese E-Mail faelschlicherweise erhalten 
> haben. Bitte loeschen Sie in diesem Fall die Nachricht.
> Jede unerlaubte Form der Reproduktion, Bekanntgabe, Aenderung, Verteilung 
> und/oder Publikation dieser E-Mail ist strengstens untersagt.
> The contents of the above mentioned e-mail is not legally binding. This 
> e-mail contains confidential and/or legally protected information. Please 
> inform us if you have received this e-mail by
> mistake and delete it in such a case. Each unauthorized reproduction, 
> disclosure, alteration, distribution and/or publication of this e-mail is 
> strictly prohibited. 

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


Re: [LKP] [mm, oom] 6209f6fc62: general_protection_fault:#[##]

2018-09-25 Thread Michal Hocko
On Tue 25-09-18 15:43:22, Rong Chen wrote:
> 
> 
> On 09/25/2018 03:31 PM, Michal Hocko wrote:
> > On Tue 25-09-18 15:00:15, Rong Chen wrote:
> > > 
> > > On 09/25/2018 02:06 PM, Michal Hocko wrote:
> > > > On Tue 25-09-18 13:48:20, kernel test robot wrote:
> > > > > FYI, we noticed the following commit (built with gcc-7):
> > > > > 
> > > > > commit: 6209f6fc62835d84c2a92d237588a114e39436ce ("mm, oom: rework 
> > > > > mmap_exit vs. oom_reaper synchronization")
> > > > > https://github.com/0day-ci/linux 
> > > > > UPDATE-20180911-024633/Tetsuo-Handa/mm-oom-Fix-unnecessary-killing-of-additional-processes/20180910-163452
> > > > Do you have a msg-id to the patch that has been tested?
> > > message_id: 20180910125513.311-2-mho...@kernel.org
> > Thanks! It woudl be really great if this was a part of the report when
> > testing patches which are not mainline yet.
> > 
> > This patch resulting in a crash is quite surprising. The patch is RFC
> > and not tested yet but I will definitely have a look. Could you help me
> > some more and give faddr2line __oom_reap_task_mm+0x40 please?
> $ faddr2line ./vmlinux __oom_reap_task_mm+0x40
> __oom_reap_task_mm+0x40/0x175:
> can_madv_dontneed_vma at mm/internal.h:48
>  (inlined by) __oom_reap_task_mm at mm/oom_kill.c:505

OK, this suggests the vma is gone. The problem is that I have removed
MMF_OOM_SKIP from the exit path and mm->mmap still points to the freed
memory. Later patch in the series does the later so the resulting code
is safe but not bisect safe. I will return MMF_OOM_SKIP setting to the
exit_mmap path and that should cure the issue.

diff --git a/mm/mmap.c b/mm/mmap.c
index 3481424717ac..a02b314c0546 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3083,8 +3083,10 @@ void exit_mmap(struct mm_struct *mm)
unmap_vmas(&tlb, vma, 0, -1);
 
/* oom_reaper cannot race with the page tables teardown */
-   if (oom)
+   if (oom) {
down_write(&mm->mmap_sem);
+   set_bit(MMF_OOM_SKIP, &mm->flags);
+   }
 
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f10aa5360616..b7137ca9c7a2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -554,7 +554,7 @@ static bool oom_reap_task_mm(struct task_struct *tsk, 
struct mm_struct *mm)
 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
 * work on the mm anymore. The check for MMF_OOM_SKIP must run
 * under mmap_sem for reading because it serializes against the
-* down_write();up_write() cycle in exit_mmap().
+* down_write() in exit_mmap().
 */
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
trace_skip_task_reaping(tsk->pid);
-- 
Michal Hocko
SUSE Labs


[RFC PATCH] soc: qcom: rmtfs_mem: Control remoteproc from rmtfs_mem

2018-09-25 Thread Bjorn Andersson
rmtfs_mem provides access to physical storage and is crucial for the
operation of the Qualcomm modem subsystem.

The rmtfs_mem implementation must be available before the modem
subsystem is booted and a solution where the modem remoteproc will
verify that the rmtfs_mem is available has been discussed in the past.
But this would not handle the case where the rmtfs_mem provider is
restarted, which would cause fatal loss of access to the storage device
for the modem.

The suggestion is therefor to link the rmtfs_mem to its associated
remote processor instance and control it based on the availability of
the rmtfs_mem implementation.

Signed-off-by: Bjorn Andersson 
---

The currently implemented workaround in the Linaro QCOMLT releases is to
blacklist the qcom_q6v5_pil kernel module and load this explicitly after rmtfs
has been started.

With this patch the modem module can be loaded automatically by the
platform_bus and will only be booted as the rmtfs becomes available. Performing
actions such as upgrading (and restarting) the rmtfs service will cause the
modem to automatically restart and hence continue to function after the
upgrade.

 .../reserved-memory/qcom,rmtfs-mem.txt|  7 ++
 drivers/remoteproc/qcom_q6v5_pil.c|  1 +
 drivers/soc/qcom/Kconfig  |  1 +
 drivers/soc/qcom/rmtfs_mem.c  | 23 ++-
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt 
b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
index 8562ba1dce69..95b209e7f5d1 100644
--- a/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/qcom,rmtfs-mem.txt
@@ -32,6 +32,13 @@ access block device data using the Remote Filesystem 
protocol.
Value type: 
Definition: vmid of the remote processor, to set up memory protection.
 
+- rproc:
+   Usage: optional
+   Value type: 
+   Definition: reference to a remoteproc node, that should be powered up
+   while the remote file system memory instance is ready to
+   handle requests from the remote subsystem.
+
 = EXAMPLE
 The following example shows the remote filesystem memory setup for APQ8016,
 with the rmtfs region for the Hexagon DSP (id #1) located at 0x8670.
diff --git a/drivers/remoteproc/qcom_q6v5_pil.c 
b/drivers/remoteproc/qcom_q6v5_pil.c
index d7a4b9eca5d2..1445a38e8b34 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -1142,6 +1142,7 @@ static int q6v5_probe(struct platform_device *pdev)
qproc = (struct q6v5 *)rproc->priv;
qproc->dev = &pdev->dev;
qproc->rproc = rproc;
+   rproc->auto_boot = false;
platform_set_drvdata(pdev, qproc);
 
ret = q6v5_init_mem(qproc, pdev);
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 01fb6aba61d2..1109272479b9 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -88,6 +88,7 @@ config QCOM_QMI_HELPERS
 config QCOM_RMTFS_MEM
tristate "Qualcomm Remote Filesystem memory driver"
depends on ARCH_QCOM
+   depends on REMOTEPROC
select QCOM_SCM
help
  The Qualcomm remote filesystem memory driver is used for allocating
diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c
index 8a3678c2e83c..8b08be310397 100644
--- a/drivers/soc/qcom/rmtfs_mem.c
+++ b/drivers/soc/qcom/rmtfs_mem.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -39,6 +40,8 @@ struct qcom_rmtfs_mem {
unsigned int client_id;
 
unsigned int perms;
+
+   struct rproc *rproc;
 };
 
 static ssize_t qcom_rmtfs_mem_show(struct device *dev,
@@ -80,11 +83,18 @@ static int qcom_rmtfs_mem_open(struct inode *inode, struct 
file *filp)
struct qcom_rmtfs_mem *rmtfs_mem = container_of(inode->i_cdev,
struct qcom_rmtfs_mem,
cdev);
+   int ret = 0;
 
get_device(&rmtfs_mem->dev);
filp->private_data = rmtfs_mem;
 
-   return 0;
+   if (rmtfs_mem->rproc) {
+   ret = rproc_boot(rmtfs_mem->rproc);
+   if (ret)
+   put_device(&rmtfs_mem->dev);
+   }
+
+   return ret;
 }
 static ssize_t qcom_rmtfs_mem_read(struct file *filp,
  char __user *buf, size_t count, loff_t *f_pos)
@@ -127,6 +137,9 @@ static int qcom_rmtfs_mem_release(struct inode *inode, 
struct file *filp)
 {
struct qcom_rmtfs_mem *rmtfs_mem = filp->private_data;
 
+   if (rmtfs_mem->rproc)
+   rproc_shutdown(rmtfs_mem->rproc);
+
put_device(&rmtfs_mem->dev);
 
return 0;
@@ -156,6 +169,7 @@ static int qcom_rmtfs_mem_probe(struct platform_device 
*pdev)

Re: [PATCH 6/9] gpio: Add support for hierarchical IRQ domains

2018-09-25 Thread Linus Walleij
Hi Thierry!

Thanks for the patch!

I am a bit ignorant about irqdomains so I will probably need an ACK
from some irq maintainer before I can apply this.

On Fri, Sep 21, 2018 at 12:25 PM Thierry Reding
 wrote:

> From: Thierry Reding 
>
> Hierarchical IRQ domains can be used to stack different IRQ controllers
> on top of each other. One specific use-case where this can be useful is
> if a power management controller has top-level controls for wakeup
> interrupts. In such cases, the power management controller can be a
> parent to other interrupt controllers and program additional registers
> when an IRQ has its wake capability enabled or disabled.
>
> Signed-off-by: Thierry Reding 

While I think it is really important that we start supporting hierarchical
irqdomains in the gpiolib core, I want a more complete approach,
so that drivers that need hierarchical handling of irqdomains
can get the same support from gpiolib as they get for simple
domains.

> @@ -1918,7 +1918,9 @@ static int gpiochip_add_irqchip(struct gpio_chip 
> *gpiochip,
> type = IRQ_TYPE_NONE;
> }
>
> -   gpiochip->to_irq = gpiochip_to_irq;
> +   if (!gpiochip->to_irq)
> +   gpiochip->to_irq = gpiochip_to_irq;

So here you let the drivers override the .to_irq() function and that
I think gets confusing as we are asking gpiolib to handle our
irqchip.


> -   gpiochip->irq.domain = irq_domain_add_simple(np, gpiochip->ngpio,
> -gpiochip->irq.first,
> -ops, gpiochip);
> +   if (gpiochip->irq.parent_domain)
> +   gpiochip->irq.domain = 
> irq_domain_add_hierarchy(gpiochip->irq.parent_domain,
> +   0, 
> gpiochip->ngpio,
> +   np, ops, 
> gpiochip);
> +   else
> +   gpiochip->irq.domain = irq_domain_add_simple(np, 
> gpiochip->ngpio,
> +
> gpiochip->irq.first,
> +ops, gpiochip);

So this part is great: if we pass in a parent domain the core helps us
create the hierarchy.

But the stuff in .to_irq() should also be handled in the gpiolib core:
the irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &spec) for
example. That way you should not need any external .to_irq() function.

I can't see if you need to pull more stuff into the core to accomplish
that, but I think in essence the core gpiolib needs to be more helpful
with hierarchies.

Yours,
Linus Walleij


Re: [PATCH v5 01/27] arm64: cpufeature: Set SYSREG_GIC_CPUIF as a boot system feature

2018-09-25 Thread Marc Zyngier

On 25/09/18 04:10, Yao Lihua wrote:

Hi Marc, Julien,


On 09/21/2018 11:56 PM, Marc Zyngier wrote:

On Tue, 28 Aug 2018 16:51:11 +0100,
Julien Thierry  wrote:

Signed-off-by: Julien Thierry 
Suggested-by: Daniel Thompson 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Suzuki K Poulose 
Cc: Marc Zyngier 
---
  arch/arm64/kernel/cpufeature.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e238b79..1e433ac 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1039,7 +1039,7 @@ static void cpu_has_fwb(const struct 
arm64_cpu_capabilities *__unused)
{
.desc = "GIC system register CPU interface",
.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
-   .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+   .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
--
1.9.1


This definitely deserves a commit message, such as:

"We do not support systems where some CPUs have an operational GICv3
  CPU interface, and some don't. Let's make this requirement obvious by
  flagging the GICv3 capability as being strict."

May I ask if it is possible to implement psedue-NMI on a arm64 SoC with GIC-400?


In theory, yes. In practice, this is likely to be both hard to implement 
(you need to discover the GIC CPU interface address very early so that 
you can patch the the PMR flipping code at the right time), and pretty 
bad from a performance point of view (MMIO accesses are likely to be slow).


Given the above, the incentive to support such a configuration is close 
to zero.


Thanks,

M.
--
Jazz is not dead. It just smells funny...


Re: [PATCH v5 0/5] sched/deadline: fix cpusets bandwidth accounting

2018-09-25 Thread Juri Lelli
Hi,

On 03/09/18 16:27, Juri Lelli wrote:
> Hi,
> 
> v5 of a series of patches, originally authored by Mathieu, with the intent
> of fixing a long standing issue of SCHED_DEADLINE bandwidth accounting.
> As originally reported by Steve [1], when hotplug and/or (certain)
> cpuset reconfiguration operations take place, DEADLINE bandwidth
> accounting information is lost since root domains are destroyed and
> recreated.
> 
> Mathieu's approach is based on restoring bandwidth accounting info on
> the newly created root domains by iterating through the (DEADLINE) tasks
> belonging to the configured cpuset(s).
> 
> Main problem of v4 was caused by the trylocking of cpuset_mutex. As
> noticed by Steve [2], if multiple tasks are created at they same time
> only the first gets to grab the mutex, the other get -EBUSY and need to
> retry. Not really nice. So, in v5 I'm proposing to use callback_lock
> instead of cpuset_mutex, which AFAIU should be enough to grant read-only
> safe access to cpusets.
> 
> 01/05 has been dropped because it wasn't really adding much and was
> only causing false positives.
> 
> 05/05 is still too much DEADLINE specific I guess, but let's first agree
> on foundations patches.
> 
> Set also available at
> 
>  https://github.com/jlelli/linux.git fixes/deadline/root-domain-accounting-v5

Gentle ping about this.

Best,

- Juri


Re: [BISECTED] Regression: Solidrun Clearfog Base won't boot since "PCI: mvebu: Only remap I/O space if configured"

2018-09-25 Thread Andrew Murray
Hi Thomas,

On Mon, Sep 24, 2018 at 02:12:03PM +0200, Thomas Petazzoni wrote:
> Hello,
> 
> On Mon, 24 Sep 2018 12:13:41 +0100, Russell King - ARM Linux wrote:
> 
> > > But being able to unmap it would also be needed to be able to remove
> > > PCI host controller drivers, and therefore compile them as module, and
> > > make them more like any other drivers.
> > > 
> > > I'm not sure why we need to guarantee that the I/O space is always
> > > mapped:
> > > 
> > >  - It isn't mapped before the PCI controller driver does the mapping.
> > > 
> > >  - There is no reason for it to be accessed when the PCI controller
> > >driver is not initialized: PCI devices can only be probed and
> > >initialized when the PCI controller driver is probed/initialized.  
> > 
> > There are historic reasons.  PCI provides ISA IO space, and when you
> > have a machine with ISA peripherals present, the PCI IO space must
> > never be unmapped - if it is, ISA drivers will oops the kernel.  There
> > is no way for a vanishing PCI controller to cause ISA drivers to be
> > unbound.
> > 
> > If you have a host controller that does unmap PCI IO space and you have
> > ISA peripherals with drivers present, unbinding the PCI host controller
> > will remove the IO space mapping, and next time an ISA peripheral
> > touches IO space, the kernel will oops.
> 
> Thanks for sharing some additional technical context on this, very
> useful.
> 
> I have another question though: shouldn't those ISA devices be child
> devices of the PCI controller, if they use some resources of the PCI
> controller ? Could you give an example of such an ISA device driver ?

Legacy VGA also falls into this category - for example
drivers/video/console/vgacon.c will happily use outb/inb macros to hard
coded addresses which are hoped to be present on some PCI/ISA bus.

With regards to ISA drivers - take a look for anything that registers with
isa_register_driver - for example:

drivers/input/touchscreen/htcpen.c
drivers/net/ethernet/3com/3c509.c
drivers/watchdog/ebc-c384_wdt.c

None of these drivers do any kind of mapping before attempting to access
these addresses.

Thanks,

Andrew Murray

> This is just to understand better the issue, because there seems to be
> a kind of hidden dependency between those ISA drivers and the setup of
> the PCI controller.
> 
> > > All other drivers, including on ARM, use pci_remap_iospace(), which
> > > does provide the pci_unmap_iospace() counter part.  
> > 
> > ... which has been created in PCI land just to deal with PCI without
> > regard for the above issue.
> > 
> > However, there's another issue I missed - if you _do_ have ISA
> > peripherals, you likely want the IO space setup from very early on,
> > and you won't be using the new fangled PCI host driver support anyway.
> > That uses pci_map_io_early() rather than pci_ioremap_io() or
> > pci_remap_io().
> 
> OK. There's today a single platform (Footbridge) that uses
> pci_map_io_early(), and it is indeed called through the ->map_io()
> hook, which is very early in the boot process.
> 
> BTW, look at drivers/pcmcia/at91_cf.c. It has ->probe() and ->remove(),
> and does a pci_ioremap_io() in its ->probe(), and nothing in its
> ->remove(). I don't think this driver, compiled as a module, will work
> well after a insmod/rmmod/insmod cycle.
> 
> > > But to me, the general direction is that the ARM-specific
> > > pci_remap_io() API is fading away, and its replacement already provides
> > > an unmapping capability. So why not add the same unmapping capability
> > > to pci_remap_io() ?  
> > 
> > Yes, that would be a good longer term plan - we don't need three
> > different ways to map PCI IO space, but it is development.
> 
> Absolutely. Glad to hear that you agree on the longer term plan.
> 
> > > But we have a regression and we need to fix it. Do you suggest to not
> > > use the new pci_host_probe() API ?  
> > 
> > Well, arguably, the patch that caused the regression is the buggy patch,
> > _not_ the lack of unmapping API for pci_ioremap_io().
> 
> Totally true.
> 
> > Trying to address a regression with further development means that
> > _that_ development needs thought and review, which is a slower
> > process.
> > 
> > I do understand the desire to keep moving forward and never take a
> > step backwards, but sometimes backwards steps are the best way to
> > resolve a regression.  But I also do appreciate that a simple revert
> > in this case is not possible.
> 
> Well, I can revert:
> 
> 42342073e38b50113354944cd51dcfed28d857a1 PCI: mvebu: Convert to use
> pci_host_bridge directly ee1604381a371b3ea6aec7d5e43b6e3f5e153854 PCI:
> mvebu: Only remap I/O space if configured
> 
> so it's not a big deal either. I can revert those, and then resubmit a
> more complete series later on that moves pci-mvebu to use
> pci_remap_iospace().
> 
> > I'll accept your patch on the condition that the ARM private
> > pci_ioremap_io() will go away in the very near future (please _try_
> > to get ag

Re: [PATCH] mtd: spi-nor: cadence-quadspi: Use proper enum for dma_unmap_single

2018-09-25 Thread Tudor Ambarus



On 09/25/2018 10:34 AM, Nathan Chancellor wrote:
> On Tue, Sep 25, 2018 at 10:24:04AM +0300, Tudor Ambarus wrote:
>> Hi, Nathan,
>>
>> On 09/21/2018 01:29 PM, Nathan Chancellor wrote:
>>> Clang warns when one enumerated type is converted implicitly to another.
>>>
>>> drivers/mtd/spi-nor/cadence-quadspi.c:962:47: warning: implicit
>>> conversion from enumeration type 'enum dma_transfer_direction' to
>>> different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
>>> dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
>>>   ~~~^~~
>>> ./include/linux/dma-mapping.h:428:66: note: expanded from macro
>>> 'dma_map_single'
>>> #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
>>>  ^
>>> drivers/mtd/spi-nor/cadence-quadspi.c:997:43: warning: implicit
>>> conversion from enumeration type 'enum dma_transfer_direction' to
>>> different enumeration type 'enum dma_data_direction' [-Wenum-conversion]
>>> dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
>>> ~^~~
>>> ./include/linux/dma-mapping.h:429:70: note: expanded from macro
>>> 'dma_unmap_single'
>>> #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
>>>  ~~  ^
>>> 2 warnings generated.
>>>
>>> Use the proper enums from dma_data_direction to satisfy Clang.
>>>
>>> DMA_TO_DEVICE = DMA_MEM_TO_DEV = 1
>>> DMA_FROM_DEVICE = DMA_DEV_TO_MEM = 2
>>>
>>> Link: https://github.com/ClangBuiltLinux/linux/issues/108
>>> Signed-off-by: Nathan Chancellor 
>>> ---
>>>  drivers/mtd/spi-nor/cadence-quadspi.c | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c 
>>> b/drivers/mtd/spi-nor/cadence-quadspi.c
>>> index 8e714fbfa521..d0e57ac81098 100644
>>> --- a/drivers/mtd/spi-nor/cadence-quadspi.c
>>> +++ b/drivers/mtd/spi-nor/cadence-quadspi.c
>>> @@ -959,7 +959,7 @@ static int cqspi_direct_read_execute(struct spi_nor 
>>> *nor, u_char *buf,
>>> return 0;
>>> }
>>>  
>>> -   dma_dst = dma_map_single(nor->dev, buf, len, DMA_DEV_TO_MEM);
>>> +   dma_dst = dma_map_single(nor->dev, buf, len, DMA_FROM_DEVICE);
>>> if (dma_mapping_error(nor->dev, dma_dst)) {
>>> dev_err(nor->dev, "dma mapping failed\n");
>>> return -ENOMEM;
>>> @@ -994,7 +994,7 @@ static int cqspi_direct_read_execute(struct spi_nor 
>>> *nor, u_char *buf,
>>> }
>>>  
>>>  err_unmap:
>>> -   dma_unmap_single(nor->dev, dma_dst, len, DMA_DEV_TO_MEM);
>>> +   dma_unmap_single(nor->dev, dma_dst, len, DMA_TO_DEVICE);
>>
>> Should have used DMA_FROM_DEVICE, as you did above. Otherwise looks good.
>>
>> Cheers,
>> ta
> 
> Thank you very much for catching this, did several of these conversions
> back to back and didn't look closely at this. I just sent a v2, I
> appreciate the review!
I guess I have some problems with my email server. I receive just some of the
emails sent to linux-...@lists.infradead.org. I haven't received your v2 and I
can't add my Reviewed-by tag there. But I checked v2 on
https://patchwork.ozlabs.org/patch/974269/, looks good, so:

Reviewed-by: Tudor Ambarus  for v2! :)

Best,
ta


Re: [RFC PATCH i2c-next 1/2] dt-bindings: i2c: aspeed: Add 'idle-wait-timeout-ms' setting

2018-09-25 Thread Wolfram Sang
On Mon, Sep 24, 2018 at 03:15:46PM -0700, Jae Hyun Yoo wrote:
> Hi Wolfram,
> 
> On 9/24/2018 2:58 PM, Wolfram Sang wrote:
> > On Tue, Sep 18, 2018 at 11:02:54AM -0700, Jae Hyun Yoo wrote:
> > > On 9/10/2018 2:45 PM, Jae Hyun Yoo wrote:
> > > > +- idle-wait-timeout-ms : bus idle waiting timeout in milliseconds when
> > > > + multi-master is set, defaults to 100 ms when 
> > > > not
> > > > + specified.
> > > 
> > > Will change it to 'aspeed,idle-wait-timeout-ms' as it's a non standard
> > > property.
> > 
> > No need. This binding is not a HW description, so not a DT property in
> > my book. I still don't understand: Your IP core in master mode does not
> > have a BUSY bit or similar which detects when a START was detected and
> > clears after a STOP?
> > 
> 
> Okay, I'll keep this property as it is then.

Sorry for the misunderstanding. I don't think this a property, at all.
It doesn't describe the hardware, it is more of a configuration thing,
or?



signature.asc
Description: PGP signature


[PATCH v5 2/2] pci: dwc: pci-dra7xx: Enable errata i870 for both EP and RC mode

2018-09-25 Thread Vignesh R
Errata i870 is applicable in both EP and RC mode. Therefore rename
function dra7xx_pcie_ep_unaligned_memaccess(), that implements errata
workaround, to dra7xx_pcie_unaligned_memaccess() and call it for both RC
and EP. Make sure driver probe does not fail in case workaround is not
applied for RC mode so as to maintain DT backward compatibility.

Reported-by: Chris Welch 
Signed-off-by: Vignesh R 
Acked-by: Kishon Vijay Abraham I 
---
 drivers/pci/controller/dwc/pci-dra7xx.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c 
b/drivers/pci/controller/dwc/pci-dra7xx.c
index ce9224a36f62..a32d6dde7a57 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -542,7 +542,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = {
 };
 
 /*
- * dra7xx_pcie_ep_unaligned_memaccess: workaround for AM572x/AM571x Errata i870
+ * dra7xx_pcie_unaligned_memaccess: workaround for AM572x/AM571x Errata i870
  * @dra7xx: the dra7xx device where the workaround should be applied
  *
  * Access to the PCIe slave port that are not 32-bit aligned will result
@@ -552,7 +552,7 @@ static const struct of_device_id of_dra7xx_pcie_match[] = {
  *
  * To avoid this issue set PCIE_SS1_AXI2OCP_LEGACY_MODE_ENABLE to 1.
  */
-static int dra7xx_pcie_ep_unaligned_memaccess(struct device *dev)
+static int dra7xx_pcie_unaligned_memaccess(struct device *dev)
 {
int ret;
struct device_node *np = dev->of_node;
@@ -704,6 +704,11 @@ static int __init dra7xx_pcie_probe(struct platform_device 
*pdev)
 
dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE,
   DEVICE_TYPE_RC);
+
+   ret = dra7xx_pcie_unaligned_memaccess(dev);
+   if (ret)
+   dev_err(dev, "WA for Errata i870 not applied\n");
+
ret = dra7xx_add_pcie_port(dra7xx, pdev);
if (ret < 0)
goto err_gpio;
@@ -717,7 +722,7 @@ static int __init dra7xx_pcie_probe(struct platform_device 
*pdev)
dra7xx_pcie_writel(dra7xx, PCIECTRL_TI_CONF_DEVICE_TYPE,
   DEVICE_TYPE_EP);
 
-   ret = dra7xx_pcie_ep_unaligned_memaccess(dev);
+   ret = dra7xx_pcie_unaligned_memaccess(dev);
if (ret)
goto err_gpio;
 
-- 
2.19.0



[PATCH v5 1/2] dt-bindings: PCI: dra7xx: Add bindings for unaligned access in host mode

2018-09-25 Thread Vignesh R
Update device tree binding documentation of TI's dra7xx PCI controller
for enabling unaligned mem access as applicable not just in EP mode but
in host mode as well.

Signed-off-by: Vignesh R 
Reviewed-by: Rob Herring 
---
 Documentation/devicetree/bindings/pci/ti-pci.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt 
b/Documentation/devicetree/bindings/pci/ti-pci.txt
index 7f7af3044016..452fe48c4fdd 100644
--- a/Documentation/devicetree/bindings/pci/ti-pci.txt
+++ b/Documentation/devicetree/bindings/pci/ti-pci.txt
@@ -26,6 +26,11 @@ HOST MODE
ranges,
interrupt-map-mask,
interrupt-map : as specified in ../designware-pcie.txt
+ - ti,syscon-unaligned-access: phandle to the syscon DT node. The 1st argument
+  should contain the register offset within syscon
+  and the 2nd argument should contain the bit field
+  for setting the bit to enable unaligned
+  access.
 
 DEVICE MODE
 ===
-- 
2.19.0



[PATCH v5 0/2] pci-dra7xx: Enable errata i870 workaround for RC mode

2018-09-25 Thread Vignesh R
Make workaround for errata i870 applicable in Host mode as
well(previously it was enabled only for EP mode) as per errata
documentation: http://www.ti.com/lit/er/sprz450/sprz450.pdf
Driver will continue to work with w/o applying errata in host mode when
using older DT, to maintain backward compatibility.

Tested on DRA72 EVM

Changes since v4:
Fail driver probe if DT binding is absent for EP(as it is in current
code)

Changes since v3:
Rebase onto latest next
Drop DT patches from this series. They will be sent separately once
driver changes in this series is merged.

Changes since v2:
Reorder patch 2 to appear at the last.
Collect all the ACKs

Changes since v1:
Drop IRQ handling rework (will be sent out separately)


Vignesh R (2):
  dt-bindings: PCI: dra7xx: Add bindings for unaligned access in host
mode
  pci: dwc: pci-dra7xx: Enable errata i870 for both EP and RC mode

 Documentation/devicetree/bindings/pci/ti-pci.txt |  5 +
 drivers/pci/controller/dwc/pci-dra7xx.c  | 11 ---
 2 files changed, 13 insertions(+), 3 deletions(-)

-- 
2.19.0



Re: [PATCH] rpmsg: core: add support to power domains for devices

2018-09-25 Thread Srinivas Kandagatla

Hi Suman,

On 25/09/18 01:25, Suman Anna wrote:

Hi Srinivas,

On 06/15/2018 04:59 AM, Srinivas Kandagatla wrote:

Some of the rpmsg devices need to switch on power domains to communicate
with remote processor. For example on Qualcomm DB820c platform LPASS
power domain needs to switched on for any kind of audio services.
This patch adds the missing power domain support in rpmsg core.

Without this patch attempting to play audio via QDSP on DB820c would
reboot the system.

Signed-off-by: Srinivas Kandagatla 
---
  drivers/rpmsg/rpmsg_core.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index b714a543a91d..8122807db380 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -15,6 +15,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  
  #include "rpmsg_internal.h"

@@ -449,6 +450,10 @@ static int rpmsg_dev_probe(struct device *dev)
struct rpmsg_endpoint *ept = NULL;
int err;
  
+	err = dev_pm_domain_attach(dev, true);

+   if (err)
+   goto out;


This patch has broken the virtio-rpmsg stack based rpmsg devices. These
devices are non-DT and the rpmsg_dev_probe() is now failing with -19
(-ENODEV) error code.



There seems to be some dependency on this patch ( 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h=v4.19-rc5&id=919b7308fcc452cd4e282bab389c33384a9f3790 
) which seems to rework the return values from acpi_dev_pm_attach() and 
genpd_dev_pm_attach()




--srini



Loic, Arnaud,
Can one of you double-confirm this behavior on ST platforms as well?
The patch came through 4.14.71 stable release and broke our downstream
kernels.

regards
Suman


+
if (rpdrv->callback) {
strncpy(chinfo.name, rpdev->id.name, RPMSG_NAME_SIZE);
chinfo.src = rpdev->src;
@@ -490,6 +495,8 @@ static int rpmsg_dev_remove(struct device *dev)
  
  	rpdrv->remove(rpdev);
  
+	dev_pm_domain_detach(dev, true);

+
if (rpdev->ept)
rpmsg_destroy_ept(rpdev->ept);
  





Re: [PATCH] pinctrl: icelake: Fix the resource number for community-4/5

2018-09-25 Thread Andy Shevchenko
On Tue, Sep 25, 2018 at 12:32 AM Rajat Jain  wrote:
> On Mon, Sep 24, 2018 at 2:09 PM Andy Shevchenko
>  wrote:
> > On Mon, Sep 24, 2018 at 8:04 PM Rajat Jain  wrote:
> > > On Mon, Sep 24, 2018 at 7:54 AM Banik, Subrata  
> > > wrote:

> > > > First of all, this is pre-production chip, so, I don't think there is a 
> > > > bug in the driver (yet) discovered.
> > > >
> > > > Looking to the above ASL code I may conclude that is definitely is 
> > > > *not* from our reference BIOS.
> > > > I have checked two versions of it and found that in both we have the 
> > > > following mapping:
> > > > for LP variant: there are only 4 communities are exported for H 
> > > > variant: there are only 5 communities are exported
> > > >
> > > > So, I guess the problem is in ASL code you provided. It simple should 
> > > > not export that community at all.
> > > >
> > > > In case you need to do so, there are ways:
> > > >  - contact Intel and ask for a change in reference BIOS
> > > >  - acquire another ACPI ID for the case, or, perhaps use special 
> > > > constants like
> > > >_HRV for that purpose (also need to contact Intel while doing that)

> > > Andy, can you please let us know in what order are the resources laid
> > > out in your reference BIOS for the case when it exports 5 communities
> > > (i.e. community-0-2, 4-5)?
> >
> > We have no hardware with such PCH to answer to this question. LP
> > version exports only 4 communities and order of them as per existing
> > driver.
>
> Got it, thanks. From your response earlier:
>
> > Looking to the above ASL code I may conclude that is definitely is *not* 
> > from our reference BIOS.
> > I have checked two versions of it and found that in both we have the 
> > following mapping:
> > for LP variant: there are only 4 communities are exported
> > for H variant: there are only 5 communities are exported
>
> 1) Do you know or recall, the order of communities in ACPI in the H
> variant? Of course, this is a request for help for getting the info.

I can't say anything about hardware I didn't see.
Even if BIOS code has something, it's not fully guaranteed that in
production it will be same. Better to ask Intel BIOS team for the
details.

> 2) Trying to understand how would the kernel support both LP and H
> variant. Is it the assumption that the H variant will have a different
> ACPI ID than the LP variant  (i.e. not "INT3455")? Because it will
> also have the same problem that we are seeing I think.

Yes, definitely they have different IDs. You may consult with
pinctrl-cannonlake.c driver (better from latest possible kernel
version like v4.19-rc5) to see how it's implemented.
Moreover, LP and H variants have different pin lists inside
communities, so, they can't be substituted and one may consider them
as different IPs.

-- 
With Best Regards,
Andy Shevchenko


[PATCH v3 0/2] staging: bcm2835-camera: Clean up completed TODO

2018-09-25 Thread Aymen Qader
Update the bcm2835-camera driver to remove a TODO and an unused header
include. These were made unnecessary in

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Aymen Qader (2):
  staging: bcm2835-camera: Update TODO
  staging: bcm2835-camera: Remove unused header

--
v2: Make commit message clearer
v3: Separate patch into two, clean up commit messages

 drivers/staging/vc04_services/bcm2835-camera/TODO | 6 --
 drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 1 -
 2 files changed, 7 deletions(-)

-- 
2.17.1



[PATCH v3 1/2] staging: bcm2835-camera: Update TODO

2018-09-25 Thread Aymen Qader
Remove the TODO item to remove manual cache flushing from bulk_receive.
This was previously done in this commit:

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Signed-off-by: Aymen Qader 
---
 drivers/staging/vc04_services/bcm2835-camera/TODO | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/staging/vc04_services/bcm2835-camera/TODO 
b/drivers/staging/vc04_services/bcm2835-camera/TODO
index cefce72d814f..6c2b4ffe4996 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/TODO
+++ b/drivers/staging/vc04_services/bcm2835-camera/TODO
@@ -15,9 +15,3 @@ padding in the V4L2 spec, but that padding doesn't match what 
the
 hardware can do.  If we exposed the native padding requirements
 through the V4L2 "multiplanar" formats, the firmware would have one
 less copy it needed to do.
-
-3) Port to ARM64
-
-The bulk_receive() does some manual cache flushing that are 32-bit ARM
-only, which we should convert to proper cross-platform APIs.
-
-- 
2.17.1



[PATCH v3 1/2] staging: bcm2835-camera: Update TODO

2018-09-25 Thread Aymen Qader
Remove the TODO item to remove manual cache flushing from bulk_receive.
This was previously done in this commit:

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Signed-off-by: Aymen Qader 
---
 drivers/staging/vc04_services/bcm2835-camera/TODO | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/staging/vc04_services/bcm2835-camera/TODO 
b/drivers/staging/vc04_services/bcm2835-camera/TODO
index cefce72d814f..6c2b4ffe4996 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/TODO
+++ b/drivers/staging/vc04_services/bcm2835-camera/TODO
@@ -15,9 +15,3 @@ padding in the V4L2 spec, but that padding doesn't match what 
the
 hardware can do.  If we exposed the native padding requirements
 through the V4L2 "multiplanar" formats, the firmware would have one
 less copy it needed to do.
-
-3) Port to ARM64
-
-The bulk_receive() does some manual cache flushing that are 32-bit ARM
-only, which we should convert to proper cross-platform APIs.
-
-- 
2.17.1



[PATCH v3 0/2] staging: bcm2835-camera: Clean up completed TODO

2018-09-25 Thread Aymen Qader
Update the bcm2835-camera driver to remove a TODO and an unused header
include. These were made unnecessary in

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Aymen Qader (2):
  staging: bcm2835-camera: Update TODO
  staging: bcm2835-camera: Remove unused header

--
v2: Make commit message clearer
v3: Separate patch into two, clean up commit messages

 drivers/staging/vc04_services/bcm2835-camera/TODO | 6 --
 drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 1 -
 2 files changed, 7 deletions(-)

-- 
2.17.1



[PATCH v3 2/2] staging: bcm2835-camera: Remove unused header

2018-09-25 Thread Aymen Qader
Remove the asm/cacheflush.h header include from mmal-vchiq.c. This
dependency was removed in:

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Signed-off-by: Aymen Qader 
---
 drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c 
b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
index 51e5b04ff0f5..6e5c1d4ee122 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "mmal-common.h"
-- 
2.17.1



[PATCH v3 2/2] staging: bcm2835-camera: Remove unused header

2018-09-25 Thread Aymen Qader
Remove the asm/cacheflush.h header include from mmal-vchiq.c. This
dependency was removed in:

commit 7e8dbea41032 ("staging: bcm2835-camera: Remove explicit cache
flush operations")

Signed-off-by: Aymen Qader 
---
 drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c 
b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
index 51e5b04ff0f5..6e5c1d4ee122 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/bcm2835-camera/mmal-vchiq.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "mmal-common.h"
-- 
2.17.1



Re: [PATCH] ASoC: soc-utils: Rename dummy_dma_ops to snd_dummy_ops

2018-09-25 Thread Andy Shevchenko
On Tue, Sep 25, 2018 at 3:48 AM Matthias Kaehlcke  wrote:
>
> The symbols 'dummy_dma_ops' is declared with different data types by
> sound/soc/soc-utils.c and arch/arm64/include/asm/dma-mapping.h. This
> leads to conflicts when soc-utils.c (indirectly) includes dma-mapping.h:
>
> sound/soc/soc-utils.c:282:33: error: conflicting types for 'dummy_dma_ops'
>   static const struct snd_pcm_ops dummy_dma_ops = {
>   ^
> ...
> arch/arm64/include/asm/dma-mapping.h:27:33: note: previous declaration of 
> 'dummy_dma_ops' was here
>   extern const struct dma_map_ops dummy_dma_ops;
>   ^
>
> Rename the symbol in soc-utils.c to 'snd_dummy_ops' to avoid the conflict.

But it misses 'dma' part as I can see they are still DMA related.

snd_dummy_dma_ops ?

>
> Signed-off-by: Matthias Kaehlcke 
> ---
>  sound/soc/soc-utils.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
> index e0c93496c0cd..ab44133bc0bf 100644
> --- a/sound/soc/soc-utils.c
> +++ b/sound/soc/soc-utils.c
> @@ -273,13 +273,13 @@ static int dummy_dma_open(struct snd_pcm_substream 
> *substream)
> return 0;
>  }
>
> -static const struct snd_pcm_ops dummy_dma_ops = {
> +static const struct snd_pcm_ops snd_dummy_ops = {
> .open   = dummy_dma_open,
> .ioctl  = snd_pcm_lib_ioctl,
>  };
>
>  static const struct snd_soc_component_driver dummy_platform = {
> -   .ops = &dummy_dma_ops,
> +   .ops = &snd_dummy_ops,
>  };
>
>  static const struct snd_soc_component_driver dummy_codec = {
> --
> 2.19.0.605.g01d371f741-goog
>


-- 
With Best Regards,
Andy Shevchenko


[PATCH 3/3] split original if condition code in function bch_cache_set_alloc

2018-09-25 Thread Dongbo Cao
remove bch_cache_set_unregister because we have not registerd right now

Signed-off-by: Dongbo Cao 
---
 drivers/md/bcache/super.c | 102 ++
 1 file changed, 82 insertions(+), 20 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 47d122ed..13a128eb 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1658,9 +1658,13 @@ void bch_cache_set_unregister(struct cache_set *c)
 struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 {
int iter_size;
+   const char *err = NULL;
+
struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
-   if (!c)
-   return NULL;
+   if (!c) {
+   err = "cache_set alloc failed";
+   goto err_cache_set_alloc;
+   }
 
__module_get(THIS_MODULE);
closure_init(&c->cl, NULL);
@@ -1715,22 +1719,55 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb 
*sb)
iter_size = (sb->bucket_size / sb->block_size + 1) *
sizeof(struct btree_iter_set);
 
-   if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) ||
-   mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
-   mempool_init_kmalloc_pool(&c->bio_meta, 2,
- sizeof(struct bbio) + sizeof(struct 
bio_vec) *
- bucket_pages(c)) ||
-   mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
-   bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
-   BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
-   !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
-   !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
-   WQ_MEM_RECLAIM, 0)) ||
-   bch_journal_alloc(c) ||
-   bch_btree_cache_alloc(c) ||
-   bch_open_buckets_alloc(c) ||
-   bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
-   goto err;
+   if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL))) {
+   err = "c->devices alloc failed";
+   goto err_devices_alloc;
+   }
+   if (mempool_init_slab_pool(&c->search, 32, bch_search_cache)) {
+   err = "c->search alloc failed";
+   goto err_search_alloc;
+   }   
+   if (mempool_init_kmalloc_pool(&c->bio_meta, 2,
+   sizeof(struct bbio) + sizeof(struct bio_vec) *
+   bucket_pages(c))) {
+   err = "c->bio_meta alloc failed";
+   goto err_bio_meta_alloc;
+   }
+   if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size)) {
+   err = "c->fill_iter alloc failed";
+   goto err_fill_iter_alloc;
+   }
+   if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
+   BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) {
+   err = "c->bio_split init failed";
+   goto err_bio_split_init;
+   }
+   if (!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c))) {
+   err = "c->uuids alloc failed";
+   goto err_uuids_alloc;
+   }
+   if (!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
+   WQ_MEM_RECLAIM, 0))) {
+   err = "c->moving_gc_wq alloc failed";
+   goto err_moving_gc_wq_alloc;
+   }
+   if (bch_journal_alloc(c)) {
+   err = "bch_journal_alloc failed";
+   goto err_bch_journal_alloc;
+
+   }
+   if (bch_btree_cache_alloc(c)) {
+   err = "bch_btree_cache_alloc failed";
+   goto err_bch_btree_cache_alloc;
+   }
+   if (bch_open_buckets_alloc(c)) {
+   err = "bch_open_buckets_alloc failed";
+   goto err_bch_open_buckets_alloc;
+   }
+   if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages))) {
+   err = "bch_bset_sort_state_init failed";
+   goto err_bch_bset_sort_state_init;
+   }
 
c->congested_read_threshold_us  = 2000;
c->congested_write_threshold_us = 2;
@@ -1738,8 +1775,33 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb 
*sb)
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
return c;
-err:
-   bch_cache_set_unregister(c);
+
+err_bch_bset_sort_state_init:
+   bch_open_buckets_free(c);
+err_bch_open_buckets_alloc:
+   bch_btree_cache_free(c);
+err_bch_btree_cache_alloc:
+   bch_journal_free(c);
+err_bch_journal_alloc:
+   destroy_workqueue(c->moving_gc_wq);
+err_moving_gc_wq_alloc:
+   free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
+err_uuids_alloc:
+   bioset_exit(&c->bio_split);
+err_bio_split_init:
+   mempool_exit(&c->fill_iter);
+err_fill_iter_alloc:
+   mempool_exit(&c->bio_meta);
+err_bio

Re: [PATCH] clk: tegra: Return the exact clock rate from clk_round_rate

2018-09-25 Thread Peter De Schrijver
On Mon, Sep 24, 2018 at 03:18:04PM -0400, r yang wrote:
> On Mon, Sep 24, 2018 at 11:08:03AM +0300, Peter De Schrijver wrote:
> > On Fri, Sep 21, 2018 at 06:01:49PM -0400, ryang wrote:
> > > The current behavior is that clk_round_rate would return the same clock
> > > rate passed to it for valid PLL configurations. This change will return
> > > the exact rate the PLL will provide in accordance with clk API.
> > > 
> > > Signed-off-by: ryang 
> > > ---
> > >  drivers/clk/tegra/clk-pll.c | 7 ++-
> > >  1 file changed, 6 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
> > > index 17a058c3bbc1..36014a6ec42e 100644
> > > --- a/drivers/clk/tegra/clk-pll.c
> > > +++ b/drivers/clk/tegra/clk-pll.c
> > > @@ -595,7 +595,12 @@ static int _calc_rate(struct clk_hw *hw, struct 
> > > tegra_clk_pll_freq_table *cfg,
> > >   return -EINVAL;
> > >   }
> > >  
> > > - cfg->output_rate >>= p_div;
> > > + if (cfg->m == 0) {
> > > + cfg->output_rate = 0;
> > 
> > I think a WARN_ON() is appropriate here. the input divider should never be 
> > 0.
> > 
> > Peter.
> > 
> 
> Should it return -EINVAL (or some error) too? _calc_rate is also in the
> clk_set_rate code path. I think we want to avoid programming the
> register to 0 input divider all together?
> 

Yes. writing 0 to the input divider is usually not allowed. In some cases it's
equivalent to writing 1, but better not count on that.

Peter.

> > > + } else {
> > > + cfg->output_rate = cfg->n * DIV_ROUND_UP(parent_rate, cfg->m);
> > > + cfg->output_rate >>= p_div;
> > > + }
> > >  
> > >   if (pll->params->pdiv_tohw) {
> > >   ret = _p_div_to_hw(hw, 1 << p_div);
> > > -- 
> > > 2.17.1
> > > 


Re: [PATCH 0/9] Implement wake event support on Tegra186 and later

2018-09-25 Thread Linus Walleij
Hi Thierry,

thanks for working on the wakeup business!

This patch gets me a bit confused on our different approaches
toward wakeups in the kernel, so I included Lina, Marc and Ulf
to see if we can get some common understanding.

On Fri, Sep 21, 2018 at 12:25 PM Thierry Reding
 wrote:

> The following is a set of patches that allow certain interrupts to be
> used as wakeup sources on Tegra186 and later. To implement this, each
> of the GPIO controllers' IRQ domain needs to become hierarchical, and
> parented to the PMC domain. The PMC domain in turn implements a new
> IRQ domain that is a child to the GIC IRQ domain.
>
> The above ensures that the interrupt chip implementation of the PMC is
> called at the correct time. The ->irq_set_type() and ->irq_set_wake()
> implementations program the PMC wake registers in a way to enable the
> given interrupts as wakeup sources.
>
> This is based on a suggestion from Thomas Gleixner that resulted from
> the following thread:
>
> https://lkml.org/lkml/2018/9/13/1042

I am not sure if you are aware about Lina's series
"Wakeup GPIO support for SDM845 SoC"
that is now in v3:
https://patchwork.kernel.org/cover/10587965/

It appears to me, though I am blissfully ignorant of the
details, that there is a relationship between this patch
series and the other one.

Your approach is to insert an hiearchical PMC irq controller
and Lina's approach is to simply put a mechanism on the
side to inject IRQs into the GIC after sleep (IIUC).

I guess your hierarchy is in response to this mail from tglx:
https://lkml.org/lkml/2018/9/14/339

So from a birds eye point of view I don't see how the Tegra
PMC irq controller and Qualcomm's PDC power domain
controller are conceptually different. Are you doing the same
thing in two different ways for the same problem space
here?

Or are these hardwares so very different that they really
warrant two different approaches to wakeups?

I guess I miss a bit of hardware insight... is the key difference
that in Qualcomm's PDC the IRQs need to be replayed/injected
by software while Tegra's PMC will do this in hardware?

Yours,
Linus Walleij


Re: [PATCH] gpio: mockup: use device properties instead of platform_data

2018-09-25 Thread Linus Walleij
On Mon, Sep 24, 2018 at 9:35 AM Bartosz Golaszewski  wrote:
> niedz., 23 wrz 2018 o 13:17 Bartosz Golaszewski  napisał(a):
> >
> > Some users want to introduce device tree support to the mockup driver.
> > Let's make it easier by switching to using generic device properties.
> > The driver stays compatible with previous use cases and after this
> > conversion there'll be no need to change the way probing of mockup
> > GPIO chips works.
> >
> > Tested with libgpiod test suite.
> >
> > Signed-off-by: Bartosz Golaszewski 
(...)
> Linus, I just noticed that we either need to drop the check for
> -ENOENT or add an else that returns on any other error. I'll need to
> send a v2

OK business as usual.

> but I'll let you first tell me if you like the general idea.

I don't know, I'm a bit ignorant about the idea, scope and
ambition with device properties.

In my book this relates to the fwnode concept, but maybe
I'm wrong?

Mika, please fill us in here. The device properties in gpiolib
came in the patch
"gpio: Rework of_gpiochip_set_names() to use device property accessors"

Yours,
Linus Walleij


Re: INFO: task hung in fsnotify_connector_destroy_workfn (2)

2018-09-25 Thread Petr Mladek
On Mon 2018-09-24 15:19:35, Jiri Kosina wrote:
> On Sun, 16 Sep 2018, Amir Goldstein wrote:
> 
> > > > syzbot found the following crash on:
> > > >
> > > > HEAD commit:11da3a7f84f1 Linux 4.19-rc3
> > > > git tree:   upstream
> > > > console output: https://syzkaller.appspot.com/x/log.txt?x=141ffbca40
> > > > kernel config:  
> > > > https://syzkaller.appspot.com/x/.config?x=9917ff4b798e1a1e
> > > > dashboard link: 
> > > > https://syzkaller.appspot.com/bug?extid=6fb572170402d311dd39
> > > > compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> > > > syz repro:  
> > > > https://syzkaller.appspot.com/x/repro.syz?x=136a60ae40
> > > >
> > > > IMPORTANT: if you fix the bug, please add the following tag to the 
> > > > commit:
> > > > Reported-by: syzbot+6fb572170402d311d...@syzkaller.appspotmail.com
> > > >
> > >
> > > Since it kept printk()ing for at least 93 seconds, it might have kept for 
> > > 150 seconds.
> > >
> > > [  174.614333] hid-generic ::.0003: unknown main item tag 0x0
> > > [  268.196102] INFO: task kworker/u4:1:23 blocked for more than 140 
> > > seconds.
> > >
> > > Since the reproducer is simple, this might be lockup due to continuous 
> > > printk().
> > > If syzbot can reliably reproduce this problem using the reproducer, try
> > > ratelimiting printk().
> > 
> > Right.. and I was able to reproduce after setting CONFIG_HID_GENERIC=y
> > but unless I am missing something, the core problem doesn't seem related to
> > fsnotify and $SUBJECT, so CCing HID maintainer.
> 
> Alright, so you're basically triggering a never-ending flow of kmsgs being 
> printed out from HID parser due to doing crazy things with the parser, and 
> that causes the issues for the kworker trying to flush them out.

My understanding is that the flow of messages is long but it ends at
some point. For example, the many warnings for
"hid-generic ::.0003" eventually ends with

[  221.223267] hid-generic ::.0003: hidraw0:  HID v0.00 
Device [syz1] on syz1


Next, there are usually intermixed messages for two devices, e.g.

[  174.776326] hid-generic ::.0003: unknown main item tag 0x0
[  174.782734] hid-generic ::.0004: unknown main item tag 0x0

Therefore I believe that console_lock is being transfered and no
process is stuck in printk()/console_unlock().


> I guess the patch below fixes it, however the kworker should not really be 
> blocked by this I think ... adding a few more printk folks to double-check 
> why the kworker would get stuck due to massive printk() flood.

It seems that the many printk() calls just delay the entire
process and rcu gets blocked for too long. The messages are
the same all the time. So some rate limiting looks like
a reasonable solution.

Best Regards,
Petr


Re: [PATCH 3.18 104/105] IB/nes: Fix a compiler warning

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 10:39:53PM +, Sasha Levin wrote:
> On Mon, Sep 24, 2018 at 11:03:25AM -0700, Joe Perches wrote:
> >On Mon, 2018-09-24 at 19:59 +0200, Greg Kroah-Hartman wrote:
> >> On Mon, Sep 24, 2018 at 09:38:26AM -0700, Joe Perches wrote:
> >> > On Mon, 2018-09-24 at 13:34 +0200, Greg Kroah-Hartman wrote:
> >> > > 3.18-stable review patch.  If anyone has any objections, please let me 
> >> > > know.
> >> >
> >> > Why should this sort of change be applied to a stable release?
> >>
> >> Originally I was just going to drop this as it's not fixing something.
> >>
> >> But it might be, if that macro is used in a if() statement, or something
> >> like that, it could be doing something unintended.
> >
> >No it couldn't.
> >An empty macro is equivalent to a single statement.
> >
> >> So I don't feel like auditing all 500+ instances where this is used,
> >> it's easier to just accept the patch.
> >
> >It's not a bug fix.
> 
> This question came up a few months ago. Greg suggested that we should be
> pulling in warning fixes to get the stable kernels warning-free similar
> to upstream.
> 
> The reasoning behind it was similar to the "no warnings" reasoning of
> upstream: there might be real issues hiding in the sea of "harmless"
> warnings, so we want to get rid of all of them to catch real issues.

No warnings is great, but not when you add the "W=1" option.  That way
lies madness and is not something anyone does on stable kernels.  They
do it on mainline when they want to try to find something to clean up
and get a coding style fix merged :)

thanks,

greg k-h


Re: [PATCH v5 0/6] Move swapper_pg_dir to rodata section.

2018-09-25 Thread Jun Yao
Hi James,

On Fri, Sep 21, 2018 at 11:26:39PM +0100, James Morse wrote:
> Hi Jun,
> 
> On 09/17/2018 05:43 AM, Jun Yao wrote:
> > Version 5 changes:
> > 1. Correct spelling and indentation errors[1].
> > 2. Update init_mm.pgd by assembly[2].
> > 3. Simplify set_p?d() by introducing set_swapper_pgd()[3].
> > 4. Reduce unnecessary tlbi for every write to swapper_pg_dir
> >during paging_init()[3].
> 
> 
> For the series:
> Reviewed-by: James Morse 

Thank you very much for helping me improve the patch and tolerate the
mistakes I made.

Thanks,

Jun


Re: [PATCH] soc: qcom: qdsp6: remove duplicated include from q6adm.c

2018-09-25 Thread zhong jiang
On 2018/9/22 0:22, Mark Brown wrote:
> On Fri, Sep 21, 2018 at 06:24:58PM +0800, zhong jiang wrote:
>> We include wait.h twice in q6adm.c. it is unnecessary. hence remove
>> it.  Further, order the include files as alphabet.
> Please use subject lines matching the style for the subsystem.  This
> makes it easier for people to identify relevant patches.
Get it . Will notice .  thanks

Sincerely,
zhong jiang



Re: [PATCH 4.9 111/111] MIPS: VDSO: Drop gic_get_usm_range() usage

2018-09-25 Thread Greg Kroah-Hartman
On Tue, Sep 25, 2018 at 11:38:16AM +0800, SZ Lin (林上智) wrote:
> Hi,
> 
> Guenter Roeck  於 2018年9月25日 週二 上午9:36寫道:
> >
> > On Mon, Sep 24, 2018 at 01:53:18PM +0200, Greg Kroah-Hartman wrote:
> > > 4.9-stable review patch.  If anyone has any objections, please let me 
> > > know.
> > >
> >
> > This patch breaks v4.4.y and v4.9.y builds.
> > It includes asm/mips-cps.h which doesn't exist in those releases.
> 
> I am sorry for my fault, thanks for your report.
> 
> Since the patch b025d51873d5fe6 "MIPS: CM: Specify register size when
> generating accessors" which created asm/mips-cps.h is not a bug-fixed
> patch, hence I will not backport this header.
> 
> Hi Greg,
> 
> Could you please help to revert this commit? This commit was intended
> to fix dependency of 70d7783 "MIPS: VDSO: Match data page cache
> colouring when D$ aliases", but I saw 70d7783 was merged before this
> commit; therefore, I don't think it is necessary to keep this commit.
> 
> I apology for any inconvenience caused, and I will be more careful next time.

Now dropped from the 4.4 and 4.9 queues, thanks.

greg k-h


Re: [PATCH v4 RESEND 3/5] KVM: x86: hyperv: use get_vcpu_by_vpidx() in kvm_hv_flush_tlb()

2018-09-25 Thread Roman Kagan
On Mon, Sep 24, 2018 at 06:55:28PM +0200, Paolo Bonzini wrote:
> On 24/09/2018 18:24, Paolo Bonzini wrote:
> > Hi Paolo,
> > 
> > could you please clarify what needs to be done to get this merged? In
> > particular, are you OK with my comment above or do we need to do
> > something with it (e.g. get back to the 'logarythmic search' from v2)?
> > 
> > In kvm/queue I can see only 'x86/hyper-v: rename ipi_arg_{ex,non_ex}
> > structures' patch from this series applied.
> 
> Hi,
> 
> my plan was to apply only 1/2/5 for now.  I singled out the rename patch
> because that one could be included in 4.19-rc kernels as a cleanup.

Is this supposed to mean you're not happy with the approach taken in
Vitaly's patch?  Can you explain why?  I take my part of guilt for it so
I'd like to know, too.

Speaking of the options we have, the choice depends on the assumptions
we take. (And I guess when you spoke of quadratic complexity you
referred to the algorithm to convert the vp_index mask into the KVM cpu
mask.)

If we can assume that in all relevant cases vp_index coincides with the
cpu index (which I think we can) then Vitaly's approach is the most
efficient.

If, on the opposite, we want to optimize for random mapping between
vp_index and cpu index, then it's probably better instead to iterate
over vcpus and test if their vp_index belongs to the requested mask.

Neither of the above is quadratic.

Dunno if we need to specifically consider intermediate situations.

Anyway using a havier vp_index -> cpu index translation looks like an
overkill to me.

What do you think?

Thanks,
Roman.


Re: [PATCH 1/2] arm64: dts: allwinner: Olimex A64-OLinuXino: enable eMMC.

2018-09-25 Thread Maxime Ripard
On Fri, Sep 21, 2018 at 11:54:07AM -0300, Rodrigo Exterckötter Tjäder wrote:
> On Fri, Sep 21, 2018 at 11:28 AM Maxime Ripard
>  wrote:
> > Expanding a bit more that commit log would be helpful. What is the
> > eMMC connected to that board? Do all versions have it? Which modes are
> > supposed to be supported, and which one have been tested?
> 
> The terseness of the commit message was already pointed out to me on
> #linux-sunxi. I was waiting for more comments before sending a v2.
> 
> But you do touch on something I also realized: there are actually
> three versions of the A64-OLinuXino. In fact only one of them has the
> WiFi board, which is already enabled in the current device tree.

Sigh, ok..

> Wouldn't it be better to split it into three separate device trees? I
> have made a patch that does that[1], if you think that is a good
> approach I can submit that as a patch and then later submit a patch on
> top of that to enable the eMMC only on the two boards that have it.

We can't really do that, unfortunately. If the device tree name was to
change for a given board, we'd break all the build systems, boot
scripts and distros out there.

Maxime

-- 
Maxime Ripard, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


signature.asc
Description: PGP signature


Re: [PATCH 4.18 164/235] pinctrl: pinmux: Return selector to the pinctrl driver

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 06:07:10PM -0500, Dan Rue wrote:
> On Mon, Sep 24, 2018 at 01:52:30PM +0200, Greg Kroah-Hartman wrote:
> > 4.18-stable review patch.  If anyone has any objections, please let me know.
> 
> I bisected a boot failure on an x15 (arm) board to this commit on
> 4.18.10-rc1. I'm also seeing issues on 4.14 and 4.18 with arm64 boards
> hikey and dragonboard 410c, but I have not investigated them yet (they
> could be unrelated).
> 
> I see there are fixes to this commit that have not been backported.
> Namely:
> 
> 823dd71f58eb ("pinctrl: ingenic: Fix group & function error checking")
> a203728ac6bb ("pinctrl: core: Return selector to the pinctrl driver")
> 
> However, I tried adding those two in but I still see the boot failure on
> x15.

Thanks for letting me know.  I've dropped this patch from the 4.14.y and
4.18.y queues.

I'll push out -rc2 versions in a bit as all of these seem to have had
problems...

thanks,

greg k-h


Re: [PATCH] gpio: mockup: use device properties instead of platform_data

2018-09-25 Thread Mika Westerberg
On Tue, Sep 25, 2018 at 10:53:30AM +0200, Linus Walleij wrote:
> On Mon, Sep 24, 2018 at 9:35 AM Bartosz Golaszewski  wrote:
> > niedz., 23 wrz 2018 o 13:17 Bartosz Golaszewski  napisał(a):
> > >
> > > Some users want to introduce device tree support to the mockup driver.
> > > Let's make it easier by switching to using generic device properties.
> > > The driver stays compatible with previous use cases and after this
> > > conversion there'll be no need to change the way probing of mockup
> > > GPIO chips works.
> > >
> > > Tested with libgpiod test suite.
> > >
> > > Signed-off-by: Bartosz Golaszewski 
> (...)
> > Linus, I just noticed that we either need to drop the check for
> > -ENOENT or add an else that returns on any other error. I'll need to
> > send a v2
> 
> OK business as usual.
> 
> > but I'll let you first tell me if you like the general idea.
> 
> I don't know, I'm a bit ignorant about the idea, scope and
> ambition with device properties.
> 
> In my book this relates to the fwnode concept, but maybe
> I'm wrong?

I think you are right.

> Mika, please fill us in here. The device properties in gpiolib
> came in the patch
> "gpio: Rework of_gpiochip_set_names() to use device property accessors"

I'm not sure what to answer to be honest :) The idea behind that patch
was that we would be able to use the same properties (DT properties,
ACPI _DSD properties, or possibly built-in properties) to set names for
GPIOs.


[PATCH 04/14] thermal/drivers/hisi: Factor out the probe functions

2018-09-25 Thread Daniel Lezcano
The hi6220 and the hi3660 probe functions are doing almost the same
operations, they can share 90% of their code.

Factor out the probe functions by moving the common code in the common
probe function.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 39 ---
 1 file changed, 12 insertions(+), 27 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 567fde6..7287818 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -391,16 +391,8 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
 {
struct platform_device *pdev = data->pdev;
struct device *dev = &pdev->dev;
-   struct resource *res;
int ret;
 
-   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   data->regs = devm_ioremap_resource(dev, res);
-   if (IS_ERR(data->regs)) {
-   dev_err(dev, "failed to get io address\n");
-   return PTR_ERR(data->regs);
-   }
-
data->clk = devm_clk_get(dev, "thermal_clk");
if (IS_ERR(data->clk)) {
ret = PTR_ERR(data->clk);
@@ -409,10 +401,6 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
return ret;
}
 
-   data->irq = platform_get_irq(pdev, 0);
-   if (data->irq < 0)
-   return data->irq;
-
data->sensor.id = HI6220_DEFAULT_SENSOR;
 
return 0;
@@ -420,21 +408,6 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
 
 static int hi3660_thermal_probe(struct hisi_thermal_data *data)
 {
-   struct platform_device *pdev = data->pdev;
-   struct device *dev = &pdev->dev;
-   struct resource *res;
-
-   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   data->regs = devm_ioremap_resource(dev, res);
-   if (IS_ERR(data->regs)) {
-   dev_err(dev, "failed to get io address\n");
-   return PTR_ERR(data->regs);
-   }
-
-   data->irq = platform_get_irq(pdev, 0);
-   if (data->irq < 0)
-   return data->irq;
-
data->sensor.id = HI3660_DEFAULT_SENSOR;
 
return 0;
@@ -553,6 +526,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 {
struct hisi_thermal_data *data;
struct device *dev = &pdev->dev;
+   struct resource *res;
int ret;
 
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -564,6 +538,17 @@ static int hisi_thermal_probe(struct platform_device *pdev)
data->sensor.data = data;
data->ops = of_device_get_match_data(dev);
 
+   res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+   data->regs = devm_ioremap_resource(dev, res);
+   if (IS_ERR(data->regs)) {
+   dev_err(dev, "failed to get io address\n");
+   return PTR_ERR(data->regs);
+   }
+
+   data->irq = platform_get_irq(pdev, 0);
+   if (data->irq < 0)
+   return data->irq;
+
ret = data->ops->probe(data);
if (ret)
return ret;
-- 
2.7.4



[PATCH 02/14] thermal/drivers/hisi: Change the driver to be sensor oriented

2018-09-25 Thread Daniel Lezcano
In order to support multiple sensors, we have to change the code to
deal with sensors and not the hisi thermal structure.

Add a back pointer to the hisi thermal structure (containerof is not a
good option because later we convert the sensor field to a pointer).

Change the functions parameters to take a sensor instead of this hisi
thermal 'data' structure.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 72 --
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 9794cfe..1fdda55 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -58,27 +58,28 @@
 #define HI6220_DEFAULT_SENSOR  2
 #define HI3660_DEFAULT_SENSOR  1
 
+struct hisi_thermal_data;
+
 struct hisi_thermal_sensor {
+   struct hisi_thermal_data *data;
struct thermal_zone_device *tzd;
uint32_t id;
uint32_t thres_temp;
 };
 
-struct hisi_thermal_data;
-
 struct hisi_thermal_ops {
-   int (*get_temp)(struct hisi_thermal_data *data);
-   int (*enable_sensor)(struct hisi_thermal_data *data);
-   int (*disable_sensor)(struct hisi_thermal_data *data);
-   int (*irq_handler)(struct hisi_thermal_data *data);
+   int (*get_temp)(struct hisi_thermal_sensor *sensor);
+   int (*enable_sensor)(struct hisi_thermal_sensor *sensor);
+   int (*disable_sensor)(struct hisi_thermal_sensor *sensor);
+   int (*irq_handler)(struct hisi_thermal_sensor *sensor);
int (*probe)(struct hisi_thermal_data *data);
 };
 
 struct hisi_thermal_data {
const struct hisi_thermal_ops *ops;
+   struct hisi_thermal_sensor sensor;
struct platform_device *pdev;
struct clk *clk;
-   struct hisi_thermal_sensor sensor;
void __iomem *regs;
int irq;
 };
@@ -273,30 +274,40 @@ static inline void hi6220_thermal_hdak_set(void __iomem 
*addr, int value)
   (value << 4), addr + HI6220_TEMP0_CFG);
 }
 
-static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi6220_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
 {
+   struct hisi_thermal_data *data = sensor->data;
+
hi6220_thermal_alarm_clear(data->regs, 1);
return 0;
 }
 
-static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi3660_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
 {
-   hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1);
+   struct hisi_thermal_data *data = sensor->data;
+
+   hi3660_thermal_alarm_clear(data->regs, sensor->id, 1);
return 0;
 }
 
-static int hi6220_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi6220_thermal_get_temp(struct hisi_thermal_sensor *sensor)
 {
+   struct hisi_thermal_data *data = sensor->data;
+
return hi6220_thermal_get_temperature(data->regs);
 }
 
-static int hi3660_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi3660_thermal_get_temp(struct hisi_thermal_sensor *sensor)
 {
-   return hi3660_thermal_get_temperature(data->regs, data->sensor.id);
+   struct hisi_thermal_data *data = sensor->data;
+
+   return hi3660_thermal_get_temperature(data->regs, sensor->id);
 }
 
-static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
 {
+   struct hisi_thermal_data *data = sensor->data;
+
/* disable sensor module */
hi6220_thermal_enable(data->regs, 0);
hi6220_thermal_alarm_enable(data->regs, 0);
@@ -307,16 +318,18 @@ static int hi6220_thermal_disable_sensor(struct 
hisi_thermal_data *data)
return 0;
 }
 
-static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
 {
+   struct hisi_thermal_data *data = sensor->data;
+
/* disable sensor module */
-   hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0);
+   hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
return 0;
 }
 
-static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
 {
-   struct hisi_thermal_sensor *sensor = &data->sensor;
+   struct hisi_thermal_data *data = sensor->data;
int ret;
 
/* enable clock for tsensor */
@@ -352,10 +365,10 @@ static int hi6220_thermal_enable_sensor(struct 
hisi_thermal_data *data)
return 0;
 }
 
-static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
 {
unsigned int value;
-   struct hisi_thermal_sensor *sensor = &data->sensor;
+   struct hisi_thermal_data *data = sensor->data;
 
/* disable interrupt */
hi3660_thermal_alarm_enable(data->regs

[PATCH 01/14] thermal/drivers/hisi: Change the platform data pointer to sensor ops

2018-09-25 Thread Daniel Lezcano
Group the temperature sensor specific ops into a single structure and
assign it to hisi thermal data structure.

Change the platform data pointer to reference the specific sensor ops
instead of the probe functions.

Moving out those allow to split the code to self-encapsulate the
sensor object.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 60 --
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 761d055..9794cfe 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -64,11 +64,18 @@ struct hisi_thermal_sensor {
uint32_t thres_temp;
 };
 
-struct hisi_thermal_data {
+struct hisi_thermal_data;
+
+struct hisi_thermal_ops {
int (*get_temp)(struct hisi_thermal_data *data);
int (*enable_sensor)(struct hisi_thermal_data *data);
int (*disable_sensor)(struct hisi_thermal_data *data);
int (*irq_handler)(struct hisi_thermal_data *data);
+   int (*probe)(struct hisi_thermal_data *data);
+};
+
+struct hisi_thermal_data {
+   const struct hisi_thermal_ops *ops;
struct platform_device *pdev;
struct clk *clk;
struct hisi_thermal_sensor sensor;
@@ -374,11 +381,6 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
struct resource *res;
int ret;
 
-   data->get_temp = hi6220_thermal_get_temp;
-   data->enable_sensor = hi6220_thermal_enable_sensor;
-   data->disable_sensor = hi6220_thermal_disable_sensor;
-   data->irq_handler = hi6220_thermal_irq_handler;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(data->regs)) {
@@ -409,11 +411,6 @@ static int hi3660_thermal_probe(struct hisi_thermal_data 
*data)
struct device *dev = &pdev->dev;
struct resource *res;
 
-   data->get_temp = hi3660_thermal_get_temp;
-   data->enable_sensor = hi3660_thermal_enable_sensor;
-   data->disable_sensor = hi3660_thermal_disable_sensor;
-   data->irq_handler = hi3660_thermal_irq_handler;
-
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(data->regs)) {
@@ -435,7 +432,7 @@ static int hisi_thermal_get_temp(void *__data, int *temp)
struct hisi_thermal_data *data = __data;
struct hisi_thermal_sensor *sensor = &data->sensor;
 
-   *temp = data->get_temp(data);
+   *temp = data->ops->get_temp(data);
 
dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
sensor->id, *temp, sensor->thres_temp);
@@ -453,7 +450,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, 
void *dev)
struct hisi_thermal_sensor *sensor = &data->sensor;
int temp = 0;
 
-   data->irq_handler(data);
+   data->ops->irq_handler(data);
 
hisi_thermal_get_temp(data, &temp);
 
@@ -502,14 +499,30 @@ static int hisi_thermal_register_sensor(struct 
platform_device *pdev,
return 0;
 }
 
+static const struct hisi_thermal_ops hi6220_ops = {
+   .get_temp   = hi6220_thermal_get_temp,
+   .enable_sensor  = hi6220_thermal_enable_sensor,
+   .disable_sensor = hi6220_thermal_disable_sensor,
+   .irq_handler= hi6220_thermal_irq_handler,
+   .probe  = hi6220_thermal_probe,
+};
+
+static const struct hisi_thermal_ops hi3660_ops = {
+   .get_temp   = hi3660_thermal_get_temp,
+   .enable_sensor  = hi3660_thermal_enable_sensor,
+   .disable_sensor = hi3660_thermal_disable_sensor,
+   .irq_handler= hi3660_thermal_irq_handler,
+   .probe  = hi3660_thermal_probe,
+};
+
 static const struct of_device_id of_hisi_thermal_match[] = {
{
.compatible = "hisilicon,tsensor",
-   .data = hi6220_thermal_probe
+   .data = &hi6220_ops,
},
{
.compatible = "hisilicon,hi3660-tsensor",
-   .data = hi3660_thermal_probe
+   .data = &hi3660_ops,
},
{ /* end */ }
 };
@@ -527,7 +540,6 @@ static void hisi_thermal_toggle_sensor(struct 
hisi_thermal_sensor *sensor,
 static int hisi_thermal_probe(struct platform_device *pdev)
 {
struct hisi_thermal_data *data;
-   int (*platform_probe)(struct hisi_thermal_data *);
struct device *dev = &pdev->dev;
int ret;
 
@@ -538,13 +550,9 @@ static int hisi_thermal_probe(struct platform_device *pdev)
data->pdev = pdev;
platform_set_drvdata(pdev, data);
 
-   platform_probe = of_device_get_match_data(dev);
-   if (!platform_probe) {
-   dev_err(dev, "failed to get probe func\n");
-   return -EINVAL;
-   }
+   data->ops = of_device_get_match_data(dev);
 
-   ret = platform_probe(data);
+   ret = data->ops->probe(data);
if 

[PATCH 06/14] thermal/drivers/hisi: Add multiple sensors support

2018-09-25 Thread Daniel Lezcano
Change the code as it is dealing with several sensors.

For git-bisect compatibility (compilation and booting), assume the DT
is not yet changed and we have a single interrupt.

Next changes will support multiple interrupt sorted by their name.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 79 +-
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 87b82fb..a5756f6 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -448,8 +448,8 @@ static const struct thermal_zone_of_device_ops 
hisi_of_thermal_ops = {
 
 static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
 {
-   struct hisi_thermal_data *data = dev;
-   struct hisi_thermal_sensor *sensor = &data->sensor[0];
+   struct hisi_thermal_sensor *sensor = dev;
+   struct hisi_thermal_data *data = sensor->data;
int temp = 0;
 
data->ops->irq_handler(sensor);
@@ -457,15 +457,17 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, 
void *dev)
hisi_thermal_get_temp(sensor, &temp);
 
if (temp >= sensor->thres_temp) {
-   dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
-temp, sensor->thres_temp);
+   dev_crit(&data->pdev->dev,
+"sensor <%d> THERMAL ALARM: %d > %d\n",
+sensor->id, temp, sensor->thres_temp);
 
-   thermal_zone_device_update(data->sensor[0].tzd,
+   thermal_zone_device_update(sensor->tzd,
   THERMAL_EVENT_UNSPECIFIED);
 
} else {
-   dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",
-temp, sensor->thres_temp);
+   dev_crit(&data->pdev->dev,
+"sensor <%d> THERMAL ALARM stopped: %d < %d\n",
+sensor->id, temp, sensor->thres_temp);
}
 
return IRQ_HANDLED;
@@ -543,7 +545,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
struct hisi_thermal_data *data;
struct device *dev = &pdev->dev;
struct resource *res;
-   int ret;
+   int i, ret;
 
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
@@ -560,37 +562,41 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
return PTR_ERR(data->regs);
}
 
-   data->irq = platform_get_irq(pdev, 0);
-   if (data->irq < 0)
-   return data->irq;
-
ret = data->ops->probe(data);
if (ret)
return ret;
 
-   ret = hisi_thermal_register_sensor(pdev, &data->sensor[0]);
-   if (ret) {
-   dev_err(dev, "failed to register thermal sensor: %d\n", ret);
-   return ret;
-   }
+   for (i = 0; i < data->nr_sensors; i++) {
+   struct hisi_thermal_sensor *sensor = &data->sensor[i];
 
-   ret = data->ops->enable_sensor(&data->sensor[0]);
-   if (ret) {
-   dev_err(dev, "Failed to setup the sensor: %d\n", ret);
-   return ret;
-   }
+   ret = hisi_thermal_register_sensor(pdev, sensor);
+   if (ret) {
+   dev_err(dev, "failed to register thermal sensor: %d\n",
+   ret);
+   return ret;
+   }
+
+   data->irq = platform_get_irq(pdev, 0);
+   if (data->irq < 0)
+   return data->irq;
 
-   if (data->irq) {
ret = devm_request_threaded_irq(dev, data->irq, NULL,
-   hisi_thermal_alarm_irq_thread,
-   IRQF_ONESHOT, "hisi_thermal", data);
+   hisi_thermal_alarm_irq_thread,
+   IRQF_ONESHOT, "hisi_thermal",
+   sensor);
if (ret < 0) {
dev_err(dev, "failed to request alarm irq: %d\n", ret);
return ret;
}
-   }
 
-   hisi_thermal_toggle_sensor(&data->sensor[0], true);
+   ret = data->ops->enable_sensor(sensor);
+   if (ret) {
+   dev_err(dev, "Failed to setup the sensor: %d\n", ret);
+   return ret;
+   }
+
+   hisi_thermal_toggle_sensor(sensor, true);
+   }
 
return 0;
 }
@@ -598,11 +604,14 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
 static int hisi_thermal_remove(struct platform_device *pdev)
 {
struct hisi_thermal_data *data = platform_get_drvdata(pdev);
-   struct hisi_thermal_sensor *sensor = &data->sensor[0];
+   int i;
 
-   hisi_thermal_toggle_sensor(sensor, false);
+   for (i = 0; i < data-

[PATCH 05/14] thermal/drivers/hisi: Prepare to support multiple sensors

2018-09-25 Thread Daniel Lezcano
Convert the 'sensor' field to a pointer and propagate the change in
the file. Havintg a pointer, gives us the opportunity to define
multiple sensors.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 41 -
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 7287818..87b82fb 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -77,10 +77,11 @@ struct hisi_thermal_ops {
 
 struct hisi_thermal_data {
const struct hisi_thermal_ops *ops;
-   struct hisi_thermal_sensor sensor;
+   struct hisi_thermal_sensor *sensor;
struct platform_device *pdev;
struct clk *clk;
void __iomem *regs;
+   int nr_sensors;
int irq;
 };
 
@@ -401,14 +402,29 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
return ret;
}
 
-   data->sensor.id = HI6220_DEFAULT_SENSOR;
+   data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+   if (!data->sensor)
+   return -ENOMEM;
+
+   data->sensor[0].id = HI6220_DEFAULT_SENSOR;
+   data->sensor[0].data = data;
+   data->nr_sensors = 1;
 
return 0;
 }
 
 static int hi3660_thermal_probe(struct hisi_thermal_data *data)
 {
-   data->sensor.id = HI3660_DEFAULT_SENSOR;
+   struct platform_device *pdev = data->pdev;
+   struct device *dev = &pdev->dev;
+
+   data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+   if (!data->sensor)
+   return -ENOMEM;
+
+   data->sensor[0].id = HI3660_DEFAULT_SENSOR;
+   data->sensor[0].data = data;
+   data->nr_sensors = 1;
 
return 0;
 }
@@ -433,7 +449,7 @@ static const struct thermal_zone_of_device_ops 
hisi_of_thermal_ops = {
 static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
 {
struct hisi_thermal_data *data = dev;
-   struct hisi_thermal_sensor *sensor = &data->sensor;
+   struct hisi_thermal_sensor *sensor = &data->sensor[0];
int temp = 0;
 
data->ops->irq_handler(sensor);
@@ -444,7 +460,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, 
void *dev)
dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
 temp, sensor->thres_temp);
 
-   thermal_zone_device_update(data->sensor.tzd,
+   thermal_zone_device_update(data->sensor[0].tzd,
   THERMAL_EVENT_UNSPECIFIED);
 
} else {
@@ -535,7 +551,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)
 
data->pdev = pdev;
platform_set_drvdata(pdev, data);
-   data->sensor.data = data;
data->ops = of_device_get_match_data(dev);
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -553,13 +568,13 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
if (ret)
return ret;
 
-   ret = hisi_thermal_register_sensor(pdev, &data->sensor);
+   ret = hisi_thermal_register_sensor(pdev, &data->sensor[0]);
if (ret) {
dev_err(dev, "failed to register thermal sensor: %d\n", ret);
return ret;
}
 
-   ret = data->ops->enable_sensor(&data->sensor);
+   ret = data->ops->enable_sensor(&data->sensor[0]);
if (ret) {
dev_err(dev, "Failed to setup the sensor: %d\n", ret);
return ret;
@@ -575,7 +590,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
}
}
 
-   hisi_thermal_toggle_sensor(&data->sensor, true);
+   hisi_thermal_toggle_sensor(&data->sensor[0], true);
 
return 0;
 }
@@ -583,11 +598,11 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
 static int hisi_thermal_remove(struct platform_device *pdev)
 {
struct hisi_thermal_data *data = platform_get_drvdata(pdev);
-   struct hisi_thermal_sensor *sensor = &data->sensor;
+   struct hisi_thermal_sensor *sensor = &data->sensor[0];
 
hisi_thermal_toggle_sensor(sensor, false);
 
-   data->ops->disable_sensor(&data->sensor);
+   data->ops->disable_sensor(sensor);
 
return 0;
 }
@@ -597,7 +612,7 @@ static int hisi_thermal_suspend(struct device *dev)
 {
struct hisi_thermal_data *data = dev_get_drvdata(dev);
 
-   data->ops->disable_sensor(&data->sensor);
+   data->ops->disable_sensor(&data->sensor[0]);
 
return 0;
 }
@@ -606,7 +621,7 @@ static int hisi_thermal_resume(struct device *dev)
 {
struct hisi_thermal_data *data = dev_get_drvdata(dev);
 
-   return data->ops->enable_sensor(&data->sensor);
+   return data->ops->enable_sensor(&data->sensor[0]);
 }
 #endif
 
-- 
2.7.4



[PATCH 00/14] thermal/drivers/hi3660: Dual cluster sensors support

2018-09-25 Thread Daniel Lezcano
This patch series provides the changes to support the dual clusters sensor on
the hikey960 board.

Most of the patches set the scene for the addition of other sensors which comes
at the end of the series.

Daniel Lezcano (14):
  thermal/drivers/hisi: Change the platform data pointer to sensor ops
  thermal/drivers/hisi: Change the driver to be sensor oriented
  thermal/drivers/hisi: Set the thermal zone private data to the sensor
pointer
  thermal/drivers/hisi: Factor out the probe functions
  thermal/drivers/hisi: Prepare to support multiple sensors
  thermal/drivers/hisi: Add multiple sensors support
  thermal/drivers/hisi: Replace macro name with relevant sensor location
  ARM64: dts: hisilicon: Add tsensor interrupt name
  thermal/drivers/hisi: Use platform_get_irq_byname
  ARM64: dts: hisilicon: Add interrupt names for the tsensors
  thermal/drivers/hisi: Remove pointless irq field
  thermal/drivers/hisi: Add more sensors channel
  ARM64: dts: hisilicon: Add dual clusters thermal zones for hi3660
  thermal/drivers/hisi: Add the dual clusters sensors for hi3660

 .../bindings/thermal/hisilicon-thermal.txt |   9 +-
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi  |  60 +++--
 arch/arm64/boot/dts/hisilicon/hi6220.dtsi  |   1 +
 drivers/thermal/hisi_thermal.c | 249 -
 4 files changed, 204 insertions(+), 115 deletions(-)

-- 
2.7.4



[PATCH 03/14] thermal/drivers/hisi: Set the thermal zone private data to the sensor pointer

2018-09-25 Thread Daniel Lezcano
Store the sensor pointer in the thermal zone private data and use it
in the callback functions. That allows to continue the conversion to
sensor oriented code where the pointers are the sensors.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 1fdda55..567fde6 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -442,8 +442,8 @@ static int hi3660_thermal_probe(struct hisi_thermal_data 
*data)
 
 static int hisi_thermal_get_temp(void *__data, int *temp)
 {
-   struct hisi_thermal_data *data = __data;
-   struct hisi_thermal_sensor *sensor = &data->sensor;
+   struct hisi_thermal_sensor *sensor = __data;
+   struct hisi_thermal_data *data = sensor->data;
 
*temp = data->ops->get_temp(sensor);
 
@@ -465,7 +465,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, 
void *dev)
 
data->ops->irq_handler(sensor);
 
-   hisi_thermal_get_temp(data, &temp);
+   hisi_thermal_get_temp(sensor, &temp);
 
if (temp >= sensor->thres_temp) {
dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
@@ -486,11 +486,10 @@ static int hisi_thermal_register_sensor(struct 
platform_device *pdev,
struct hisi_thermal_sensor *sensor)
 {
int ret, i;
-   struct hisi_thermal_data *data = sensor->data;
const struct thermal_trip *trip;
 
sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev,
-  sensor->id, data,
+  sensor->id, sensor,
   
&hisi_of_thermal_ops);
if (IS_ERR(sensor->tzd)) {
ret = PTR_ERR(sensor->tzd);
-- 
2.7.4



[PATCH 12/14] thermal/drivers/hisi: Add more sensors channel

2018-09-25 Thread Daniel Lezcano
Add the sensor channels id for the little, g3d and modem.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 87d8a13..ba89cb9 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -56,7 +56,12 @@
 #define HI3660_TEMP_LAG(4000)
 
 #define HI6220_CLUSTER0_SENSOR 2
+#define HI6220_CLUSTER1_SENSOR 1
+
+#define HI3660_LITTLE_SENSOR   0
 #define HI3660_BIG_SENSOR  1
+#define HI3660_G3D_SENSOR  2
+#define HI3660_MODEM_SENSOR3
 
 struct hisi_thermal_data;
 
-- 
2.7.4



[PATCH 10/14] ARM64: dts: hisilicon: Add interrupt names for the tsensors

2018-09-25 Thread Daniel Lezcano
Add the missing interrupts for the temperature sensors as well as
their names.

Signed-off-by: Daniel Lezcano 
---
 Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt | 8 ++--
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi   | 8 ++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt 
b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
index 3edfae3..4cb8add 100644
--- a/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
@@ -29,7 +29,11 @@ for Hi3660:
tsensor: tsensor@fff3 {
compatible = "hisilicon,hi3660-tsensor";
reg = <0x0 0xfff3 0x0 0x1000>;
-   interrupts = ;
-   interrupt-names = "tsensor_a73";
+   interrupts = ,
+,
+,
+;
+   interrupt-names = "tsensor_a73", "tsensor_a53",
+ "tsensor_g3d", "tsensor_modem";
#thermal-sensor-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index bf8a479..dd398cb 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -1080,8 +1080,12 @@
tsensor: tsensor@fff3 {
compatible = "hisilicon,hi3660-tsensor";
reg = <0x0 0xfff3 0x0 0x1000>;
-   interrupts = ;
-   interrupt-names = "tsensor_a73";
+   interrupts = ,
+,
+,
+;
+   interrupt-names = "tsensor_a73", "tsensor_a53",
+ "tsensor_g3d", "tsensor_modem";
#thermal-sensor-cells = <1>;
};
 
-- 
2.7.4



[PATCH 07/14] thermal/drivers/hisi: Replace macro name with relevant sensor location

2018-09-25 Thread Daniel Lezcano
Change the macro name in order to give a better indication of the
sensor location.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index a5756f6..a542cb3 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -55,8 +55,8 @@
 #define HI3660_TEMP_STEP   (205)
 #define HI3660_TEMP_LAG(4000)
 
-#define HI6220_DEFAULT_SENSOR  2
-#define HI3660_DEFAULT_SENSOR  1
+#define HI6220_CLUSTER0_SENSOR 2
+#define HI3660_BIG_SENSOR  1
 
 struct hisi_thermal_data;
 
@@ -406,7 +406,7 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
if (!data->sensor)
return -ENOMEM;
 
-   data->sensor[0].id = HI6220_DEFAULT_SENSOR;
+   data->sensor[0].id = HI6220_CLUSTER0_SENSOR;
data->sensor[0].data = data;
data->nr_sensors = 1;
 
@@ -422,7 +422,7 @@ static int hi3660_thermal_probe(struct hisi_thermal_data 
*data)
if (!data->sensor)
return -ENOMEM;
 
-   data->sensor[0].id = HI3660_DEFAULT_SENSOR;
+   data->sensor[0].id = HI3660_BIG_SENSOR;
data->sensor[0].data = data;
data->nr_sensors = 1;
 
-- 
2.7.4



[PATCH 14/14] thermal/drivers/hisi: Add the dual clusters sensors for hi3660

2018-09-25 Thread Daniel Lezcano
The code is ready to support multiple sensors on the hi3660. The DT
defines a thermal zone per cluster.

Add the little cluster sensor and let it bind with the thermal zone.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index ba89cb9..c4111a9 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -424,14 +424,20 @@ static int hi3660_thermal_probe(struct hisi_thermal_data 
*data)
struct platform_device *pdev = data->pdev;
struct device *dev = &pdev->dev;
 
-   data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+   data->nr_sensors = 2;
+
+   data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) *
+   data->nr_sensors, GFP_KERNEL);
if (!data->sensor)
return -ENOMEM;
 
data->sensor[0].id = HI3660_BIG_SENSOR;
data->sensor[0].irq_name = "tsensor_a73";
data->sensor[0].data = data;
-   data->nr_sensors = 1;
+
+   data->sensor[1].id = HI3660_LITTLE_SENSOR;
+   data->sensor[1].irq_name = "tsensor_a53";
+   data->sensor[1].data = data;
 
return 0;
 }
@@ -443,8 +449,8 @@ static int hisi_thermal_get_temp(void *__data, int *temp)
 
*temp = data->ops->get_temp(sensor);
 
-   dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
-   sensor->id, *temp, sensor->thres_temp);
+   dev_dbg(&data->pdev->dev, "tzd=%p, id=%d, temp=%d, thres=%d\n",
+   sensor->tzd, sensor->id, *temp, sensor->thres_temp);
 
return 0;
 }
-- 
2.7.4



[PATCH 13/14] ARM64: dts: hisilicon: Add dual clusters thermal zones for hi3660

2018-09-25 Thread Daniel Lezcano
Add a thermal zone for the little cluster, so we can handle two
sensors managing each a cluster on the SoC.

Signed-off-by: Daniel Lezcano 
---
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi | 97 ---
 1 file changed, 63 insertions(+), 34 deletions(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index dd398cb..6df7d9f 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -1090,42 +1090,71 @@
};
 
 thermal-zones {
+   tz_a53: tz_a53 {
+   polling-delay = <1000>;
+   polling-delay-passive = <100>;
+   sustainable-power = <4500>;
+
+   /* sensor ID */
+   thermal-sensors = <&tsensor 0>;
+
+   trips {
+   a53_temp_threshold: trip-point@0 {
+   temperature = <65000>;
+   hysteresis = <1000>;
+   type = "passive";
+   };
+
+   a53_temp_target: trip-point@1 {
+   temperature = <75000>;
+   hysteresis = <1000>;
+   type = "passive";
+   };
+   };
 
-cls0: cls0 {
-polling-delay = <1000>;
-polling-delay-passive = <100>;
-sustainable-power = <4500>;
-
-/* sensor ID */
-thermal-sensors = <&tsensor 1>;
-
-trips {
-threshold: trip-point@0 {
-temperature = <65000>;
-hysteresis = <1000>;
-type = "passive";
-};
-
-target: trip-point@1 {
-temperature = <75000>;
-hysteresis = <1000>;
-type = "passive";
-};
-};
-
-cooling-maps {
+   cooling-maps {
map0 {
-trip = <&target>;
-contribution = <1024>;
-cooling-device = <&cpu0 
THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
-};
-   map1 {
-trip = <&target>;
-contribution = <512>;
-cooling-device = <&cpu4 
THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
-};
-};
-};
+   trip = <&a53_temp_target>;
+   contribution = <512>;
+   cooling-device = <&cpu0
+   THERMAL_NO_LIMIT
+   THERMAL_NO_LIMIT>;
+   };
+   };
+   };
+
+   tz_a73: tz_a73 {
+   polling-delay = <1000>;
+   polling-delay-passive = <100>;
+   sustainable-power = <4500>;
+
+   /* sensor ID */
+   thermal-sensors = <&tsensor 1>;
+
+   trips {
+   a73_temp_threshold: trip-point@0 {
+   temperature = <65000>;
+   hysteresis = <1000>;
+   type = "passive";
+   };
+
+   a73_temp_target: trip-point@1 {
+   temperature = <75000>;
+   hysteresis = <1000>;
+   type = "passive";
+

[PATCH 11/14] thermal/drivers/hisi: Remove pointless irq field

2018-09-25 Thread Daniel Lezcano
The irq field in the data structure is pointless as the scope of its
usage is just to request the interrupt. It can be replaced by a local
variable.

Use the 'ret' variable to get the interrupt number.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index 941c2c4..87d8a13 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -83,7 +83,6 @@ struct hisi_thermal_data {
struct clk *clk;
void __iomem *regs;
int nr_sensors;
-   int irq;
 };
 
 /*
@@ -579,16 +578,16 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
return ret;
}
 
-   data->irq = platform_get_irq_byname(pdev, sensor->irq_name);
-   if (data->irq < 0)
-   return data->irq;
+   ret = platform_get_irq_byname(pdev, sensor->irq_name);
+   if (ret < 0)
+   return ret;
 
-   ret = devm_request_threaded_irq(dev, data->irq, NULL,
+   ret = devm_request_threaded_irq(dev, ret, NULL,
hisi_thermal_alarm_irq_thread,
IRQF_ONESHOT, sensor->irq_name,
sensor);
if (ret < 0) {
-   dev_err(dev, "failed to request alarm irq: %d\n", ret);
+   dev_err(dev, "Failed to request alarm irq: %d\n", ret);
return ret;
}
 
-- 
2.7.4



[PATCH 08/14] ARM64: dts: hisilicon: Add tsensor interrupt name

2018-09-25 Thread Daniel Lezcano
Add the interrupt names for the sensors, so the code can rely on them
instead of dealing with index which are prone to error.

The name comes from the Hisilicon documentation found on internet.

Signed-off-by: Daniel Lezcano 
---
 .../bindings/thermal/hisilicon-thermal.txt |  3 ++
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi  | 63 +++---
 arch/arm64/boot/dts/hisilicon/hi6220.dtsi  |  1 +
 3 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt 
b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
index cef716a..3edfae3 100644
--- a/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt
@@ -7,6 +7,7 @@
   region.
 - interrupt: The interrupt number to the cpu. Defines the interrupt used
   by /SOCTHERM/tsensor.
+- interrupt-names: The interrupt names for the different sensors
 - clock-names: Input clock name, should be 'thermal_clk'.
 - clocks: phandles for clock specified in "clock-names" property.
 - #thermal-sensor-cells: Should be 1. See ./thermal.txt for a description.
@@ -18,6 +19,7 @@ for Hi6220:
compatible = "hisilicon,tsensor";
reg = <0x0 0xf7030700 0x0 0x1000>;
interrupts = <0 7 0x4>;
+   interrupt-names = "tsensor_intr";
clocks = <&sys_ctrl HI6220_TSENSOR_CLK>;
clock-names = "thermal_clk";
#thermal-sensor-cells = <1>;
@@ -28,5 +30,6 @@ for Hi3660:
compatible = "hisilicon,hi3660-tsensor";
reg = <0x0 0xfff3 0x0 0x1000>;
interrupts = ;
+   interrupt-names = "tsensor_a73";
#thermal-sensor-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index f432b0a..bf8a479 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -1081,46 +1081,47 @@
compatible = "hisilicon,hi3660-tsensor";
reg = <0x0 0xfff3 0x0 0x1000>;
interrupts = ;
+   interrupt-names = "tsensor_a73";
#thermal-sensor-cells = <1>;
};
 
-   thermal-zones {
+thermal-zones {
 
-   cls0: cls0 {
-   polling-delay = <1000>;
-   polling-delay-passive = <100>;
-   sustainable-power = <4500>;
+cls0: cls0 {
+polling-delay = <1000>;
+polling-delay-passive = <100>;
+sustainable-power = <4500>;
 
-   /* sensor ID */
-   thermal-sensors = <&tsensor 1>;
+/* sensor ID */
+thermal-sensors = <&tsensor 1>;
 
-   trips {
-   threshold: trip-point@0 {
-   temperature = <65000>;
-   hysteresis = <1000>;
-   type = "passive";
-   };
+trips {
+threshold: trip-point@0 {
+temperature = <65000>;
+hysteresis = <1000>;
+type = "passive";
+};
 
-   target: trip-point@1 {
-   temperature = <75000>;
-   hysteresis = <1000>;
-   type = "passive";
-   };
-   };
+target: trip-point@1 {
+temperature = <75000>;
+hysteresis = <1000>;
+type = "passive";
+};
+};
 
-   cooling-maps {
+cooling-maps {
map0 {
-   trip = <&target>;
-   contribution = <1024>;
-   cooling-device = <&cpu0 
THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
-   };
+  

[PATCH 09/14] thermal/drivers/hisi: Use platform_get_irq_byname

2018-09-25 Thread Daniel Lezcano
As we have the interrupt names defines, replace platform_get_irq() by
platform_get_irq_byname(), so no confusion can be made when getting
the interrupt with the sensor id.

Signed-off-by: Daniel Lezcano 
---
 drivers/thermal/hisi_thermal.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index a542cb3..941c2c4 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -63,6 +63,7 @@ struct hisi_thermal_data;
 struct hisi_thermal_sensor {
struct hisi_thermal_data *data;
struct thermal_zone_device *tzd;
+   const char *irq_name;
uint32_t id;
uint32_t thres_temp;
 };
@@ -407,6 +408,7 @@ static int hi6220_thermal_probe(struct hisi_thermal_data 
*data)
return -ENOMEM;
 
data->sensor[0].id = HI6220_CLUSTER0_SENSOR;
+   data->sensor[0].irq_name = "tsensor_intr";
data->sensor[0].data = data;
data->nr_sensors = 1;
 
@@ -423,6 +425,7 @@ static int hi3660_thermal_probe(struct hisi_thermal_data 
*data)
return -ENOMEM;
 
data->sensor[0].id = HI3660_BIG_SENSOR;
+   data->sensor[0].irq_name = "tsensor_a73";
data->sensor[0].data = data;
data->nr_sensors = 1;
 
@@ -576,13 +579,13 @@ static int hisi_thermal_probe(struct platform_device 
*pdev)
return ret;
}
 
-   data->irq = platform_get_irq(pdev, 0);
+   data->irq = platform_get_irq_byname(pdev, sensor->irq_name);
if (data->irq < 0)
return data->irq;
 
ret = devm_request_threaded_irq(dev, data->irq, NULL,
hisi_thermal_alarm_irq_thread,
-   IRQF_ONESHOT, "hisi_thermal",
+   IRQF_ONESHOT, sensor->irq_name,
sensor);
if (ret < 0) {
dev_err(dev, "failed to request alarm irq: %d\n", ret);
-- 
2.7.4



Re: [PATCH 4.18 000/235] 4.18.10-stable review

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 01:49:46PM +0200, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.18.10 release.
> There are 235 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Wed Sep 26 11:30:01 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.18.10-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.18.y
> and the diffstat can be found below.

-rc2 is out to resolve some reported problems:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.18.10-rc2.gz



Re: [PATCH 4.14 000/173] 4.14.72-stable review

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 01:50:34PM +0200, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.14.72 release.
> There are 173 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Wed Sep 26 11:30:10 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.72-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.

-rc2 is out to resolve some reported problems:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.72-rc2.gz



Re: [PATCH 4.9 000/111] 4.9.129-stable review

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 01:51:27PM +0200, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.9.129 release.
> There are 111 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Wed Sep 26 11:30:16 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.129-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.9.y
> and the diffstat can be found below.

-rc2 is out to resolve some reported problems:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.129-rc2.gz




Re: [PATCH 4.4 00/70] 4.4.158-stable review

2018-09-25 Thread Greg Kroah-Hartman
On Mon, Sep 24, 2018 at 01:51:59PM +0200, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.4.158 release.
> There are 70 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Wed Sep 26 11:30:25 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.158-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.

-rc2 is out to resolve some reported problems:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.158-rc2.gz




Re: [PATCH 3/3] fuse: Use hash table to link processing request

2018-09-25 Thread Miklos Szeredi
On Tue, Sep 11, 2018 at 12:12 PM, Kirill Tkhai  wrote:
> We noticed the performance bottle neck in FUSE running our
> Virtuozzo storage over rdma. On some types of workload
> we observe 20% of times pent in request_find() in profiler.
> This function is iterating over long requests list, and it
> scales bad.
>
> The patch introduces hash table to reduce the number
> of iterations, we do in this function. Hash generating
> algorithm is taken from hash_add() function, while
> 512 lines table is used to store pending requests.
> This fixes problem and improves the performance.

Pushed to fuse.git#for-next with a number of small changes.   E.g. I
reduced the number of cachlines to 256 to make the hashtable size just
4k.   Was there a scientific reason for choosing 512 as the optimal
number of cache lines?

Thanks,
Miklos


Re: [PATCH] PCI: remove unnecessary check of device_type == pci

2018-09-25 Thread Lorenzo Pieralisi
On Wed, Aug 29, 2018 at 01:34:40PM -0500, Rob Herring wrote:
> PCI host drivers have already matched on compatible strings, so checking
> device_type is redundant. Also, device_type is considered deprecated for
> FDT though we've still been requiring it for PCI hosts as it is useful
> for finding PCI buses.
> 
> Cc: Will Deacon 
> Cc: Lorenzo Pieralisi 
> Cc: Bjorn Helgaas 
> Cc: Alan Douglas 
> Cc: Subrahmanya Lingappa 
> Cc: Michal Simek 
> Cc: linux-...@vger.kernel.org
> Cc: linux-arm-ker...@lists.infradead.org
> Signed-off-by: Rob Herring 
> ---
>  drivers/pci/controller/pci-host-common.c   | 8 
>  drivers/pci/controller/pcie-cadence-host.c | 7 ---
>  drivers/pci/controller/pcie-mobiveil.c | 7 ---
>  drivers/pci/controller/pcie-xilinx-nwl.c   | 9 -
>  drivers/pci/controller/pcie-xilinx.c   | 7 ---
>  5 files changed, 38 deletions(-)

Applied to pci/controller-misc for v4.20, thanks.

Lorenzo

> diff --git a/drivers/pci/controller/pci-host-common.c 
> b/drivers/pci/controller/pci-host-common.c
> index d8f10451f273..c742881b5061 100644
> --- a/drivers/pci/controller/pci-host-common.c
> +++ b/drivers/pci/controller/pci-host-common.c
> @@ -58,9 +58,7 @@ static struct pci_config_window *gen_pci_init(struct device 
> *dev,
>  int pci_host_common_probe(struct platform_device *pdev,
> struct pci_ecam_ops *ops)
>  {
> - const char *type;
>   struct device *dev = &pdev->dev;
> - struct device_node *np = dev->of_node;
>   struct pci_host_bridge *bridge;
>   struct pci_config_window *cfg;
>   struct list_head resources;
> @@ -70,12 +68,6 @@ int pci_host_common_probe(struct platform_device *pdev,
>   if (!bridge)
>   return -ENOMEM;
>  
> - type = of_get_property(np, "device_type", NULL);
> - if (!type || strcmp(type, "pci")) {
> - dev_err(dev, "invalid \"device_type\" %s\n", type);
> - return -EINVAL;
> - }
> -
>   of_pci_check_probe_only();
>  
>   /* Parse and map our Configuration Space windows */
> diff --git a/drivers/pci/controller/pcie-cadence-host.c 
> b/drivers/pci/controller/pcie-cadence-host.c
> index ec394f6a19c8..97e251090b4f 100644
> --- a/drivers/pci/controller/pcie-cadence-host.c
> +++ b/drivers/pci/controller/pcie-cadence-host.c
> @@ -235,7 +235,6 @@ static int cdns_pcie_host_init(struct device *dev,
>  
>  static int cdns_pcie_host_probe(struct platform_device *pdev)
>  {
> - const char *type;
>   struct device *dev = &pdev->dev;
>   struct device_node *np = dev->of_node;
>   struct pci_host_bridge *bridge;
> @@ -268,12 +267,6 @@ static int cdns_pcie_host_probe(struct platform_device 
> *pdev)
>   rc->device_id = 0x;
>   of_property_read_u16(np, "device-id", &rc->device_id);
>  
> - type = of_get_property(np, "device_type", NULL);
> - if (!type || strcmp(type, "pci")) {
> - dev_err(dev, "invalid \"device_type\" %s\n", type);
> - return -EINVAL;
> - }
> -
>   res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "reg");
>   pcie->reg_base = devm_ioremap_resource(dev, res);
>   if (IS_ERR(pcie->reg_base)) {
> diff --git a/drivers/pci/controller/pcie-mobiveil.c 
> b/drivers/pci/controller/pcie-mobiveil.c
> index a939e8d31735..77052a0712d0 100644
> --- a/drivers/pci/controller/pcie-mobiveil.c
> +++ b/drivers/pci/controller/pcie-mobiveil.c
> @@ -301,13 +301,6 @@ static int mobiveil_pcie_parse_dt(struct mobiveil_pcie 
> *pcie)
>   struct platform_device *pdev = pcie->pdev;
>   struct device_node *node = dev->of_node;
>   struct resource *res;
> - const char *type;
> -
> - type = of_get_property(node, "device_type", NULL);
> - if (!type || strcmp(type, "pci")) {
> - dev_err(dev, "invalid \"device_type\" %s\n", type);
> - return -EINVAL;
> - }
>  
>   /* map config resource */
>   res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
> diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c 
> b/drivers/pci/controller/pcie-xilinx-nwl.c
> index fb32840ce8e6..81538d77f790 100644
> --- a/drivers/pci/controller/pcie-xilinx-nwl.c
> +++ b/drivers/pci/controller/pcie-xilinx-nwl.c
> @@ -777,16 +777,7 @@ static int nwl_pcie_parse_dt(struct nwl_pcie *pcie,
>struct platform_device *pdev)
>  {
>   struct device *dev = pcie->dev;
> - struct device_node *node = dev->of_node;
>   struct resource *res;
> - const char *type;
> -
> - /* Check for device type */
> - type = of_get_property(node, "device_type", NULL);
> - if (!type || strcmp(type, "pci")) {
> - dev_err(dev, "invalid \"device_type\" %s\n", type);
> - return -EINVAL;
> - }
>  
>   res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "breg");
>   pcie->breg_base = devm_ioremap_resource(dev, res);
> diff --git a/drivers/pci/controller/pcie-xilinx.c 
> b/drivers/pci/controller/pcie-xilinx.c

  1   2   3   4   5   6   7   8   >