Re: [PATCH v14 01/15] selftests/powerpc: Add more SPR numbers, TM & VMX instructions to 'reg.h'

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> This patch adds SPR number for TAR, PPR, DSCR special
> purpose registers. It also adds TM, VSX, VMX related
> instructions which will then be used by patches later
> in the series.
> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 
> ---
>  tools/testing/selftests/powerpc/reg.h | 42
> ---
>  1 file changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/powerpc/reg.h
> b/tools/testing/selftests/powerpc/reg.h
> index fddf368..fee7be9 100644
> --- a/tools/testing/selftests/powerpc/reg.h
> +++ b/tools/testing/selftests/powerpc/reg.h
> @@ -18,6 +18,19 @@
>  
>  #define mb() asm volatile("sync" : : : "memory");
>  
> +/* Vector Instructions */
> +#define VSX_XX1(xs, ra, rb)  (((xs) & 0x1f) << 21 | ((ra) <<
> 16) |  \
> +  ((rb) << 11) | (((xs) >> 5)))
> +#define STXVD2X(xs, ra, rb)  .long (0x7c000798 | VSX_XX1((xs),
> (ra), (rb)))
> +#define LXVD2X(xs, ra, rb)   .long (0x7c000698 | VSX_XX1((xs),
> (ra), (rb)))

Theres an instructions.h file in tools/testing/selftests/powerpc/ would
these be better suited there?

> +
> +/* TM instructions */
> +#define TBEGIN   ".long 0x7C00051D;"
> +#define TABORT   ".long 0x7C00071D;"
> +#define TEND ".long 0x7C00055D;"
> +#define TSUSPEND ".long 0x7C0005DD;"
> +#define TRESUME  ".long 0x7C2005DD;"
> +

These are only useful on old compilers that don't know about TM. For
selftests I would discourage creating these and using the actual
instructions, they are fairly well known today by most compilers.

>  #define SPRN_MMCR2 769
>  #define SPRN_MMCRA 770
>  #define SPRN_MMCR0 779
> @@ -51,10 +64,33 @@
>  #define SPRN_SDAR  781
>  #define SPRN_SIER  768
>  
> -#define SPRN_TEXASR 0x82
> +#define SPRN_TEXASR 0x82/* Transaction Exception and Status
> Register */
>  #define SPRN_TFIAR  0x81/* Transaction Failure Inst
> Addr*/
>  #define SPRN_TFHAR  0x80/* Transaction Failure Handler Addr
> */
> -#define TEXASR_FS   0x0800
> -#define SPRN_TAR0x32f
> +#define SPRN_TAR0x32f/* Target Address Register */
> +
> +#define SPRN_DSCR_PRIV 0x11  /* Privilege State DSCR */
> +#define SPRN_DSCR  0x03  /* Data Stream Control Register
> */
> +#define SPRN_PPR   896   /* Program Priority Register */
> +
> +/* TEXASR register bits */
> +#define TEXASR_FC0xFE00
> +#define TEXASR_FP0x0100
> +#define TEXASR_DA0x0080
> +#define TEXASR_NO0x0040
> +#define TEXASR_FO0x0020
> +#define TEXASR_SIC   0x0010
> +#define TEXASR_NTC   0x0008
> +#define TEXASR_TC0x0004
> +#define TEXASR_TIC   0x0002
> +#define TEXASR_IC0x0001
> +#define TEXASR_IFC   0x8000
> +#define TEXASR_ABT   0x0001
> +#define TEXASR_SPD   0x8000
> +#define TEXASR_HV0x2000
> +#define TEXASR_PR0x1000
> +#define TEXASR_FS0x0800
> +#define TEXASR_TE0x0400
> +#define TEXASR_ROT   0x0200
>  
>  #endif /* _SELFTESTS_POWERPC_REG_H */


Re: [PATCH v14 05/15] selftests/powerpc: Add ptrace tests for GPR/FPR registers in TM

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> This patch adds ptrace interface test for GPR/FPR registers
> inside TM context. This adds ptrace interface based helper
> functions related to checkpointed GPR/FPR access.
> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 
> ---
>  tools/testing/selftests/powerpc/ptrace/Makefile|   3 +-
>  .../selftests/powerpc/ptrace/ptrace-tm-gpr.c   | 296
> +
>  2 files changed, 298 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-tm-
> gpr.c
> 
> diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile
> b/tools/testing/selftests/powerpc/ptrace/Makefile
> index 31e8e33..170683a 100644
> --- a/tools/testing/selftests/powerpc/ptrace/Makefile
> +++ b/tools/testing/selftests/powerpc/ptrace/Makefile
> @@ -1,8 +1,9 @@
> -TEST_PROGS := ptrace-ebb ptrace-gpr
> +TEST_PROGS := ptrace-ebb ptrace-gpr ptrace-tm-gpr
>  
>  all: $(TEST_PROGS)
>  CFLAGS += -m64
>  $(TEST_PROGS): ../harness.c ptrace.S ../utils.c ptrace.h
>  ptrace-ebb: ../pmu/event.c ../pmu/lib.c ../pmu/ebb/ebb_handler.S
> ../pmu/ebb/busy_loop.S
> +
>  clean:
>   rm -f $(TEST_PROGS) *.o
> diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
> b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
> new file mode 100644
> index 000..8417d04
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
> @@ -0,0 +1,296 @@
> +/*
> + * Ptrace test for GPR/FPR registers in TM context
> + *
> + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +#include "ptrace.h"
> +#include "ptrace-gpr.h"
> +
> +/* Tracer and Tracee Shared Data */
> +int shm_id;
> +volatile unsigned long *cptr, *pptr;
> +
> +float a = FPR_1;
> +float b = FPR_2;
> +float c = FPR_3;
> +
> +void tm_gpr(void)
> +{
> + unsigned long gpr_buf[18];
> + unsigned long result, texasr;
> + float fpr_buf[32];
> +
> + printf("Starting the child\n");
> + cptr = (unsigned long *)shmat(shm_id, NULL, 0);
> +
> +trans:
> + cptr[1] = 0;
> + asm __volatile__(
> +
> + "li 14, %[gpr_1];"
> + "li 15, %[gpr_1];"
> + "li 16, %[gpr_1];"
> + "li 17, %[gpr_1];"
> + "li 18, %[gpr_1];"
> + "li 19, %[gpr_1];"
> + "li 20, %[gpr_1];"
> + "li 21, %[gpr_1];"
> + "li 22, %[gpr_1];"
> + "li 23, %[gpr_1];"
> + "li 24, %[gpr_1];"
> + "li 25, %[gpr_1];"
> + "li 26, %[gpr_1];"
> + "li 27, %[gpr_1];"
> + "li 28, %[gpr_1];"
> + "li 29, %[gpr_1];"
> + "li 30, %[gpr_1];"
> + "li 31, %[gpr_1];"
> +
> + "lfs 0, 0(%[flt_1]);"
> + "lfs 1, 0(%[flt_1]);"
> + "lfs 2, 0(%[flt_1]);"
> + "lfs 3, 0(%[flt_1]);"
> + "lfs 4, 0(%[flt_1]);"
> + "lfs 5, 0(%[flt_1]);"
> + "lfs 6, 0(%[flt_1]);"
> + "lfs 7, 0(%[flt_1]);"
> + "lfs 8, 0(%[flt_1]);"
> + "lfs 9, 0(%[flt_1]);"
> + "lfs 10, 0(%[flt_1]);"
> + "lfs 11, 0(%[flt_1]);"
> + "lfs 12, 0(%[flt_1]);"
> + "lfs 13, 0(%[flt_1]);"
> + "lfs 14, 0(%[flt_1]);"
> + "lfs 15, 0(%[flt_1]);"
> + "lfs 16, 0(%[flt_1]);"
> + "lfs 17, 0(%[flt_1]);"
> + "lfs 18, 0(%[flt_1]);"
> + "lfs 19, 0(%[flt_1]);"
> + "lfs 20, 0(%[flt_1]);"
> + "lfs 21, 0(%[flt_1]);"
> + "lfs 22, 0(%[flt_1]);"
> + "lfs 23, 0(%[flt_1]);"
> + "lfs 24, 0(%[flt_1]);"
> + "lfs 25, 0(%[flt_1]);"
> + "lfs 26, 0(%[flt_1]);"
> + "lfs 27, 0(%[flt_1]);"
> + "lfs 28, 0(%[flt_1]);"
> + "lfs 29, 0(%[flt_1]);"
> + "lfs 30, 0(%[flt_1]);"
> + "lfs 31, 0(%[flt_1]);"
> +

There was this in the previous patch? Can we consolidate?

> + "1: ;"
> + TBEGIN

tbegin. is probably fine

> + "beq 2f;"
> +
> + "li 14, %[gpr_2];"
> + "li 15, %[gpr_2];"
> + "li 16, %[gpr_2];"
> + "li 17, %[gpr_2];"
> + "li 18, %[gpr_2];"
> + "li 19, %[gpr_2];"
> + "li 20, %[gpr_2];"
> + "li 21, %[gpr_2];"
> + "li 22, %[gpr_2];"
> + "li 23, %[gpr_2];"
> + "li 24, %[gpr_2];"
> + "li 25, %[gpr_2];"
> + "li 26, %[gpr_2];"
> + "li 27, %[gpr_2];"
> + "li 28, %[gpr_2];"
> +  

Re: [PATCH v14 04/15] selftests/powerpc: Add ptrace tests for GPR/FPR registers

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> This patch adds ptrace interface test for GPR/FPR registers.
> This adds ptrace interface based helper functions related to
> GPR/FPR access and some assembly helper functions related to
> GPR/FPR registers.
> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 
> ---
>  tools/testing/selftests/powerpc/ptrace/Makefile|   3 +-
>  .../testing/selftests/powerpc/ptrace/ptrace-gpr.c  | 196
> +++
>  .../testing/selftests/powerpc/ptrace/ptrace-gpr.h  |  74 
>  tools/testing/selftests/powerpc/ptrace/ptrace.S| 131
> +
>  tools/testing/selftests/powerpc/ptrace/ptrace.h| 211
> +
>  5 files changed, 614 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-
> gpr.c
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-
> gpr.h
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.S
> 

[snip]

> new file mode 100644
> index 000..193beea
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.S
> @@ -0,0 +1,131 @@
> +/*
> + * Ptrace interface test helper assembly functions
> + *
> + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +#include 
> +#include "../reg.h"
> +
> +
> +/* Non volatile GPR - unsigned long buf[18] */
> +FUNC_START(load_gpr)
> + ld  14, 0*8(3)
> + ld  15, 1*8(3)
> + ld  16, 2*8(3)
> + ld  17, 3*8(3)
> + ld  18, 4*8(3)
> + ld  19, 5*8(3)
> + ld  20, 6*8(3)
> + ld  21, 7*8(3)
> + ld  22, 8*8(3)
> + ld  23, 9*8(3)
> + ld  24, 10*8(3)
> + ld  25, 11*8(3)
> + ld  26, 12*8(3)
> + ld  27, 13*8(3)
> + ld  28, 14*8(3)
> + ld  29, 15*8(3)
> + ld  30, 16*8(3)
> + ld  31, 17*8(3)
> + blr
> +FUNC_END(load_gpr)
> +
> +FUNC_START(store_gpr)
> + std 14, 0*8(3)
> + std 15, 1*8(3)
> + std 16, 2*8(3)
> + std 17, 3*8(3)
> + std 18, 4*8(3)
> + std 19, 5*8(3)
> + std 20, 6*8(3)
> + std 21, 7*8(3)
> + std 22, 8*8(3)
> + std 23, 9*8(3)
> + std 24, 10*8(3)
> + std 25, 11*8(3)
> + std 26, 12*8(3)
> + std 27, 13*8(3)
> + std 28, 14*8(3)
> + std 29, 15*8(3)
> + std 30, 16*8(3)
> + std 31, 17*8(3)
> + blr
> +FUNC_END(store_gpr)
> +
> +/* Single Precision Float - float buf[32] */
> +FUNC_START(load_fpr)
> + lfs 0, 0*4(3)
> + lfs 1, 1*4(3)
> + lfs 2, 2*4(3)
> + lfs 3, 3*4(3)
> + lfs 4, 4*4(3)
> + lfs 5, 5*4(3)
> + lfs 6, 6*4(3)
> + lfs 7, 7*4(3)
> + lfs 8, 8*4(3)
> + lfs 9, 9*4(3)
> + lfs 10, 10*4(3)
> + lfs 11, 11*4(3)
> + lfs 12, 12*4(3)
> + lfs 13, 13*4(3)
> + lfs 14, 14*4(3)
> + lfs 15, 15*4(3)
> + lfs 16, 16*4(3)
> + lfs 17, 17*4(3)
> + lfs 18, 18*4(3)
> + lfs 19, 19*4(3)
> + lfs 20, 20*4(3)
> + lfs 21, 21*4(3)
> + lfs 22, 22*4(3)
> + lfs 23, 23*4(3)
> + lfs 24, 24*4(3)
> + lfs 25, 25*4(3)
> + lfs 26, 26*4(3)
> + lfs 27, 27*4(3)
> + lfs 28, 28*4(3)
> + lfs 29, 29*4(3)
> + lfs 30, 30*4(3)
> + lfs 31, 31*4(3)
> + blr
> +FUNC_END(load_fpr)
> +
> +FUNC_START(store_fpr)
> + stfs 0, 0*4(3)
> + stfs 1, 1*4(3)
> + stfs 2, 2*4(3)
> + stfs 3, 3*4(3)
> + stfs 4, 4*4(3)
> + stfs 5, 5*4(3)
> + stfs 6, 6*4(3)
> + stfs 7, 7*4(3)
> + stfs 8, 8*4(3)
> + stfs 9, 9*4(3)
> + stfs 10, 10*4(3)
> + stfs 11, 11*4(3)
> + stfs 12, 12*4(3)
> + stfs 13, 13*4(3)
> + stfs 14, 14*4(3)
> + stfs 15, 15*4(3)
> + stfs 16, 16*4(3)
> + stfs 17, 17*4(3)
> + stfs 18, 18*4(3)
> + stfs 19, 19*4(3)
> + stfs 20, 20*4(3)
> + stfs 21, 21*4(3)
> + stfs 22, 22*4(3)
> + stfs 23, 23*4(3)
> + stfs 24, 24*4(3)
> + stfs 25, 25*4(3)
> + stfs 26, 26*4(3)
> + stfs 27, 27*4(3)
> + stfs 28, 28*4(3)
> + stfs 29, 29*4(3)
> + stfs 30, 30*4(3)
> + stfs 31, 31*4(3)
> + blr
> +FUNC_END(store_fpr)

I wrote similar functions in math/fpu_asm.S perhaps it would be time
consolidate those and these into an fpu_asm.S file at a higher level,
TM related tests would benefit as well.

> diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h
> b/tools/testing/selftests/powerpc/ptrace/ptrace.h
> index fbf73ca..1019004 100644
> --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
> @@ -157,6 +157,214 @@ fail:
>   return TEST_FAIL;
>  }
>  
[s

Re: [PATCH v14 07/15] selftests/powerpc: Add ptrace tests for TAR, PPR, DSCR registers

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> This patch adds ptrace interface test for TAR, PPR, DSCR
> registers. This also adds ptrace interface based helper
> functions related to TAR, PPR, DSCR register access.
> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 
> ---
>  tools/testing/selftests/powerpc/ptrace/Makefile|   3 +-
>  .../testing/selftests/powerpc/ptrace/ptrace-tar.c  | 159
> ++
>  .../testing/selftests/powerpc/ptrace/ptrace-tar.h  |  50 ++
>  tools/testing/selftests/powerpc/ptrace/ptrace.h| 181
> +
>  4 files changed, 392 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-
> tar.c
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-
> tar.h
> 

[snip]

> +
> +void tar(void)
> +{
> + unsigned long reg[3];
> + int ret;
> +
> + cptr = (int *)shmat(shm_id, NULL, 0);
> + printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
> + user_write, TAR_1, PPR_1, DSCR_1);
> +
> + mtspr(SPRN_TAR, TAR_1);
> + mtspr(SPRN_PPR, PPR_1);
> + mtspr(SPRN_DSCR, DSCR_1);
> +
> + cptr[2] = 1;
> +
> + /* Wait on parent */
> + while (!cptr[0]);
asm volatile("" ::: "memory");

> +
> + reg[0] = mfspr(SPRN_TAR);
> + reg[1] = mfspr(SPRN_PPR);
> + reg[2] = mfspr(SPRN_DSCR);
> +
> + printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
> + user_read, reg[0], reg[1], reg[2]);
> +
> + /* Unblock the parent now */
> + cptr[1] = 1;
> + shmdt((int *)cptr);
> +
> + ret = validate_tar_registers(reg, TAR_2, PPR_2, DSCR_2);
> + if (ret)
> + exit(1);
> + exit(0);
> +}
> +
> +int trace_tar(pid_t child)
> +{
> + unsigned long reg[3];
> + int ret;
> +
> + ret = start_trace(child);
> + if (ret)
> + return TEST_FAIL;
> +
> + ret = show_tar_registers(child, reg);
> + if (ret)
> + return TEST_FAIL;
> +
> + printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
> + ptrace_read_running, reg[0], reg[1],
> reg[2]);
> +
> + ret = validate_tar_registers(reg, TAR_1, PPR_1, DSCR_1);
> + if (ret)
> + return TEST_FAIL;
> +
> + ret = stop_trace(child);
> + if (ret)
> + return TEST_FAIL;
> +
> + return TEST_PASS;
> +}
> +
> +int trace_tar_write(pid_t child)
> +{
> + int ret;
> +
> + ret = start_trace(child);
> + if (ret)
> + return TEST_FAIL;
> +
> + ret = write_tar_registers(child, TAR_2, PPR_2, DSCR_2);
> + if (ret)
> + return TEST_FAIL;
> +
> + printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
> + ptrace_write_running, TAR_2, PPR_2, DSCR_2);
> +
> + ret = stop_trace(child);
> + if (ret)
> + return TEST_FAIL;
> +
> + return TEST_PASS;
> +}

More comments about calling TEST_FAIL(x)

> +
> +int ptrace_tar(void)
> +{
> + pid_t pid;
> + int ret, status;
> +
> + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3,
> 0777|IPC_CREAT);
> + pid = fork();
> + if (pid < 0) {
> + perror("fork() failed");
> + return TEST_FAIL;
> + }
> +
> + if (pid == 0)
> + tar();
> +
> + if (pid) {
> 

[snip]


Re: [PATCH v14 15/15] selftests/powerpc: Fix a build issue

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> Fixes the following build failure -
> 
> cp_abort.c:90:3: error: ‘for’ loop initial declarations are only
> allowed in C99 or C11 mode
>    for (int i = 0; i < NUM_LOOPS; i++) {
>    ^
> cp_abort.c:90:3: note: use option -std=c99, -std=gnu99, -std=c11 or
> -std=gnu11 to compile your code
> cp_abort.c:97:3: error: ‘for’ loop initial declarations are only
> allowed in C99 or C11 mode
>    for (int i = 0; i < NUM_LOOPS; i++) {
> 
> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 

Reviewed-by: Cyril Bur 

> ---
>  tools/testing/selftests/powerpc/context_switch/cp_abort.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git
> a/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> index 5a5b55a..1ce7dce 100644
> --- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
> @@ -67,7 +67,7 @@ int test_cp_abort(void)
>   /* 128 bytes for a full cache line */
>   char buf[128] __cacheline_aligned;
>   cpu_set_t cpuset;
> - int fd1[2], fd2[2], pid;
> + int fd1[2], fd2[2], pid, i;
>   char c;
>  
>   /* only run this test on a P9 or later */
> @@ -87,14 +87,14 @@ int test_cp_abort(void)
>   FAIL_IF(pid < 0);
>  
>   if (!pid) {
> - for (int i = 0; i < NUM_LOOPS; i++) {
> + for (i = 0; i < NUM_LOOPS; i++) {
>   FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1);
>   FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1);
>   /* A paste succeeds if CR0 EQ bit is set */
>   FAIL_IF(paste(buf) & 0x2000);
>   }
>   } else {
> - for (int i = 0; i < NUM_LOOPS; i++) {
> + for (i = 0; i < NUM_LOOPS; i++) {
>   FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1);
>   copy(buf);
>   FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));


Re: [PATCH v14 13/15] selftests/powerpc: Add ptrace tests for TM SPR registers

2016-09-13 Thread Cyril Bur
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote:
> From: Anshuman Khandual 
> 
> This patch adds ptrace interface test for TM SPR registers. This
> also adds ptrace interface based helper functions related to TM
> SPR registers access.
> 

I'm seeing this one fail a lot, it does occasionally succeed but fails
a lot on my test setup.

I use qemu on a power8 for most of my testing:
qemu-system-ppc64 --enable-kvm -machine pseries,accel=kvm,usb=off -m
4096 -realtime mlock=off -smp 4,sockets=1,cores=2,threads=2 -nographic
-vga none


> Signed-off-by: Anshuman Khandual 
> Signed-off-by: Simon Guo 
> ---
>  tools/testing/selftests/powerpc/ptrace/Makefile|   3 +-
>  .../selftests/powerpc/ptrace/ptrace-tm-spr.c   | 186
> +
>  tools/testing/selftests/powerpc/ptrace/ptrace.h|  35 
>  3 files changed, 223 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-tm-
> spr.c
> 
> diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile
> b/tools/testing/selftests/powerpc/ptrace/Makefile
> index 797840a..f34670e 100644
> --- a/tools/testing/selftests/powerpc/ptrace/Makefile
> +++ b/tools/testing/selftests/powerpc/ptrace/Makefile
> @@ -1,7 +1,8 @@
>  TEST_PROGS := ptrace-ebb ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr
> \
>  ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx
> \
> -ptrace-tm-spd-vsx
> +ptrace-tm-spd-vsx ptrace-tm-spr
>  
> +include ../../lib.mk
>  
>  all: $(TEST_PROGS)
>  CFLAGS += -m64
> diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
> b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
> new file mode 100644
> index 000..2863070
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
> @@ -0,0 +1,186 @@
> +/*
> + * Ptrace test TM SPR registers
> + *
> + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +#include "ptrace.h"
> +
> +/* Tracee and tracer shared data */
> +struct shared {
> + int flag;
> + struct tm_spr_regs regs;
> +};
> +unsigned long tfhar;
> +
> +int shm_id;
> +volatile struct shared *cptr, *pptr;
> +
> +int shm_id1;
> +volatile int *cptr1, *pptr1;
> +
> +#define TM_SCHED 0xde018c01
> +#define TM_KVM_SCHED 0xe001ac01
> +
> +int validate_tm_spr(struct tm_spr_regs *regs)
> +{
> + if (regs->tm_tfhar != tfhar)
> + return TEST_FAIL;
> +
> + if ((regs->tm_texasr != TM_SCHED) && (regs->tm_texasr !=
> TM_KVM_SCHED))
> + return TEST_FAIL;

The above condition fails, should this test try again if this condition
is true, rather than fail?

> +
> + if ((regs->tm_texasr == TM_KVM_SCHED) && (regs->tm_tfiar !=
> 0))
> + return TEST_FAIL;
> +
> + return TEST_PASS;
> +}
> +

[snip]



Re: [PATCH V4 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-21 Thread Cyril Bur
On Wed, 2017-06-21 at 13:36 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 

Sorry I probably should have pointed out earlier that I don't really
understand the first patch or exactly what problem you're trying to
solve. I've left it ignored, feel free to explain what the idea is
there or hopefully someone who can see what you're trying to do can
step in.

As for this patch, just one thing.


> Signed-off-by: Shilpasri G Bhat 
> ---
> - Hold occ->cmd_in_progress in read()
> - Reset occ->rsp_consumed if copy_to_user() fails
> 
>  arch/powerpc/include/asm/opal-api.h|  41 +++-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 313 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   8 +
>  6 files changed, 366 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 

[snip]

> +
> +static ssize_t opal_occ_read(struct file *file, char __user *buf,
> +  size_t count, loff_t *ppos)
> +{
> + struct miscdevice *dev = file->private_data;
> + struct occ *occ = container_of(dev, struct occ, dev);
> + int rc;
> +
> + if (count < sizeof(*occ->rsp) + occ->rsp->size)
> + return -EINVAL;
> +
> + if (!atomic_cmpxchg(&occ->rsp_consumed, 1, 0))
> + return -EBUSY;
> +
> + if (atomic_cmpxchg(&occ->cmd_in_progress, 0, 1))
> + return -EBUSY;
> +

Personally I would have done these two checks the other way around, it
doesn't really matter which one you do first but what does matter is
that you undo the change you did in the first cmpxchg if the second
cmpxchg causes you do return.

In this case if cmd_in_progress then you'll have marked the response as
consumed...

> + rc = copy_to_user((void __user *)buf, occ->rsp,
> +   sizeof(occ->rsp) + occ->rsp->size);
> + if (rc) {
> + atomic_set(&occ->rsp_consumed, 1);
> + atomic_set(&occ->cmd_in_progress, 0);
> + pr_err("Failed to copy OCC response data to user\n");
> + return rc;
> + }
> +
> + atomic_set(&occ->cmd_in_progress, 0);
> + return sizeof(*occ->rsp) + occ->rsp->size;
> +}
> +

[snip]


Re: [PATCH V4 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-21 Thread Cyril Bur
On Thu, 2017-06-22 at 09:57 +0530, Shilpasri G Bhat wrote:
> Hi Cyril,
> 
> On 06/22/2017 06:28 AM, Cyril Bur wrote:
> > On Wed, 2017-06-21 at 13:36 +0530, Shilpasri G Bhat wrote:
> > > In P9, OCC (On-Chip-Controller) supports shared memory based
> > > commad-response interface. Within the shared memory there is an OPAL
> > > command buffer and OCC response buffer that can be used to send
> > > inband commands to OCC. This patch adds a platform driver to support
> > > the command/response interface between OCC and the host.
> > > 
> > 
> > Sorry I probably should have pointed out earlier that I don't really
> > understand the first patch or exactly what problem you're trying to
> > solve. I've left it ignored, feel free to explain what the idea is
> > there or hopefully someone who can see what you're trying to do can
> > step in.
> 
> Thanks for reviewing this patch.
> 
> For the first patch however, OCC expects a different request_id in the command
> interface every time OPAL is requesting a new command .
> 'opal_async_get_token_interruptible()' returns a free token from the
> 'opal_async_complete_map' which does not work for the above OCC requirement as
> we may end up getting the same token. Thus the first patch tries to get a new
> token excluding a token that was used for the last command.
> 

Oh yes I see because you're using the OPAL token to the opal call as
the OCC msg request_id. Is there any requirement that these two numbers
match?

Is the 'different' a per OCC or a global 'all OCC' requirement?

Couldn't you just have a counter in your occ struct in this patch that
you increment on each call?

I've looked through your skiboot patch and it looks like you use the
msg->request_id so that you know who to respond to - wouldn't be easier
to just pass the opal token through as an extra parameter to
OPAL_OCC_COMMAND and store it alongside the msg structure?

Apologies I shouldn't have overlooked this for so long.

> 
> > 
> > As for this patch, just one thing.
> > 
> > 
> > > Signed-off-by: Shilpasri G Bhat 
> > > ---
> > > - Hold occ->cmd_in_progress in read()
> > > - Reset occ->rsp_consumed if copy_to_user() fails
> > > 
> > >  arch/powerpc/include/asm/opal-api.h|  41 +++-
> > >  arch/powerpc/include/asm/opal.h|   3 +
> > >  arch/powerpc/platforms/powernv/Makefile|   2 +-
> > >  arch/powerpc/platforms/powernv/opal-occ.c  | 313 
> > > +
> > >  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
> > >  arch/powerpc/platforms/powernv/opal.c  |   8 +
> > >  6 files changed, 366 insertions(+), 2 deletions(-)
> > >  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> > > 
> > 
> > [snip]
> > 
> > > +
> > > +static ssize_t opal_occ_read(struct file *file, char __user *buf,
> > > +  size_t count, loff_t *ppos)
> > > +{
> > > + struct miscdevice *dev = file->private_data;
> > > + struct occ *occ = container_of(dev, struct occ, dev);
> > > + int rc;
> > > +
> > > + if (count < sizeof(*occ->rsp) + occ->rsp->size)
> > > + return -EINVAL;
> > > +
> > > + if (!atomic_cmpxchg(&occ->rsp_consumed, 1, 0))
> > > + return -EBUSY;
> > > +
> > > + if (atomic_cmpxchg(&occ->cmd_in_progress, 0, 1))
> > > + return -EBUSY;
> > > +
> > 
> > Personally I would have done these two checks the other way around, it
> > doesn't really matter which one you do first but what does matter is
> > that you undo the change you did in the first cmpxchg if the second
> > cmpxchg causes you do return.
> > 
> > In this case if cmd_in_progress then you'll have marked the response as
> > consumed...
> 
> Here, if cmd_in_progress is set by some other thread doing a write() then it
> will set the 'rsp_consumed' to valid on successful command completion. If
> write() fails then we are doing a good thing here by not setting 
> 'rsp_consumed'
> so the user will not be able to read previous command's response.
> 

Yep, if you're happy I'm happy, I suppose there is a scenario where you
might lose a response.

read() and sees that there is a response, and marks it as consumed,
meanwhile a write grabs cmd_in_progress which causes the read()er to
return EBUSY but something fails before the write()er can actually send
an occ command 

Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels

2015-01-05 Thread Cyril Bur
On Mon, 2015-01-05 at 11:50 -0500, Don Zickus wrote:
> cc'ing Marcelo
> 
> On Mon, Dec 22, 2014 at 04:06:02PM +1100, Cyril Bur wrote:
> > When the hypervisor pauses a virtualised kernel the kernel will observe a 
> > jump
> > in timebase, this can cause spurious messages from the softlockup detector.
> > 
> > Whilst these messages are harmless, they are accompanied with a stack trace
> > which causes undue concern and more problematically the stack trace in the
> > guest has nothing to do with the observed problem and can only be 
> > misleading.
> > 
> > Futhermore, on POWER8 this is completely avoidable with the introduction of
> > the Virtual Time Base (VTB) register.
> 
> Hi Cyril,
> 
> Your solution seems simple and doesn't disturb the softlockup code as much
> as the x86 solution does.  The only small issue I had was the use of
> sched_clock instead of local_clock.  I keep forgetting the difference
> (unstable clock is the biggest reason I think).
My apologies there it appears I stuffed up, local_clock was used
initially in the softlockup code, I'll send a v2.

> Other than that, I am not the biggest fan of putting multiple virtual
> guest solutions for the same problem into the watchdog code.  I would
> prefer a common solution/framework to leverage.
Agreed.

> I have the x86 folks focusing on the steal_time stuff.  It started with
> KVM and I believe VMWare is working on utilizing it too (and maybe Xen).
I'm not sure I've ever seen this, could you please point me towards
something I can look at?

> Not sure if that is useful or could be incoporated into the power8 code.
> Though to be honest I am curious if the steal_time code could be ported to
> your solution as it seems the watchdog code could remove all the
> steal_time warts.
Happy to help sus out the situation here, again, if you could pass on
what the x86 guys are working on, thanks.


Thanks,

Cyril
> I have cc'd Marcelo into this discussion as he was the last person I
> remember talking with about this problem.
> 
> Cheers,
> Don


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2015-01-05 Thread Cyril Bur
cc'd Martin Schwidefsky and Heiko Carstens.

On Mon, 2015-01-05 at 14:10 -0800, Andrew Morton wrote:
> On Mon, 22 Dec 2014 16:06:04 +1100 Cyril Bur  wrote:
> 
> > On POWER8 virtualised kernels the VTB register can be read to have a view of
> > time that only increases while the guest is running. This will prevent 
> > guests
> > from seeing time jump if a guest is paused for significant amounts of time.
> > 
> > On POWER7 and below virtualised kernels stolen time is subtracted from
> > sched_clock as a best effort approximation. This will not eliminate spurious
> > warnings in the case of a suspended guest but may reduce the occurance in 
> > the
> > case of softlockups due to host over commit.
> > 
> > Bare metal kernels should avoid reading the VTB as KVM does not restore sane
> > values when not executing. sched_clock is returned in this case.
> > 
> > --- a/arch/powerpc/kernel/time.c
> > +++ b/arch/powerpc/kernel/time.c
> > @@ -621,6 +621,30 @@ unsigned long long sched_clock(void)
> > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
> >  }
> >  
> > +unsigned long long running_clock(void)
> 
> Non-kvm kernels don't need this code.  Is there some appropriate
> "#ifdef CONFIG_foo" we can wrap this in?
CONFIG_PSERIES would work, having said that typical compilation for a
powernv kernel almost always includes CONFIG_PSERIES (although it
doesn't need to)... still, your point is valid, will add in v2.
> 
> 
> > +{
> > +   /*
> > +* Don't read the VTB as a host since KVM does not switch in host 
> > timebase
> > +* into the VTB when it takes a guest off the CPU, reading the VTB would
> > +* result in reading 'last switched out' guest VTB.
> > +*/
> > +
> > +   if (firmware_has_feature(FW_FEATURE_LPAR)) {
> > +   if (cpu_has_feature(CPU_FTR_ARCH_207S))
> > +   return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << 
> > tb_to_ns_shift;
> > +
> > +   /* This is a next best approximation without a VTB. */
> > +   return sched_clock() - 
> > cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
> 
> Why is this result dependent on FW_FEATURE_LPAR?  It's all generic code.
Good point, the reason it ended up there is because I wanted to avoid
behaviour changes.
> 
> In fact the kernel/sched/clock.c default implementation of
> running_clock() could use this expression.  Would that be good or bad? :)
For power I'm almost certain it would be fine, on platforms which don't
do stolen time cpustat[CPUTIME_STEAL] should always be zero and if not
then the value should always be sane (although as mentioned in the
comment, not as accurate as using the VTB).

Putting it in the default implementation could cause behavioural changes
for x86 and s390, I would want their views on doing that.
> 
> > +   }
> > +
> > +   /*
> > +* On a host which doesn't do any virtualisation TB *should* equal VTB 
> > so
> > +* it makes no difference anyway.
> > +*/
> > +
> > +   return sched_clock();
> > +}
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels

2015-01-05 Thread Cyril Bur
On Mon, 2015-01-05 at 14:09 -0800, Andrew Morton wrote:
> On Mon, 22 Dec 2014 16:06:02 +1100 Cyril Bur  wrote:
> 
> > When the hypervisor pauses a virtualised kernel the kernel will observe a 
> > jump
> > in timebase, this can cause spurious messages from the softlockup detector.
> > 
> > Whilst these messages are harmless, they are accompanied with a stack trace
> > which causes undue concern and more problematically the stack trace in the
> > guest has nothing to do with the observed problem and can only be 
> > misleading.
> > 
> > Futhermore, on POWER8 this is completely avoidable with the introduction of
> > the Virtual Time Base (VTB) register.
> 
> Does this problem apply to other KVM implementations and to Xen?  If
> so, what would implementations of running_clock() for those look like? 
> If not, why not?
Yes the problem should appear on other KVM implementations, not really
sure about Xen but I don't see why the problem wouldn't crop up.

x86 do have a method for dealing with it in the softlockup detector,
they've added a check in the softlockup using a paravirtualised clock
where the guest can discover if it had been paused, Xen could be using
too.
It doesn't appear s390 do anything.

Thanks,

Cyril
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels

2015-01-08 Thread Cyril Bur

On Tue, 2015-01-06 at 10:01 -0500, Don Zickus wrote:
> On Tue, Jan 06, 2015 at 10:53:35AM +1100, Cyril Bur wrote:
> > On Mon, 2015-01-05 at 11:50 -0500, Don Zickus wrote:
> > > cc'ing Marcelo
> > > 
> > > On Mon, Dec 22, 2014 at 04:06:02PM +1100, Cyril Bur wrote:
> > > > When the hypervisor pauses a virtualised kernel the kernel will observe 
> > > > a jump
> > > > in timebase, this can cause spurious messages from the softlockup 
> > > > detector.
> > > > 
> > > > Whilst these messages are harmless, they are accompanied with a stack 
> > > > trace
> > > > which causes undue concern and more problematically the stack trace in 
> > > > the
> > > > guest has nothing to do with the observed problem and can only be 
> > > > misleading.
> > > > 
> > > > Futhermore, on POWER8 this is completely avoidable with the 
> > > > introduction of
> > > > the Virtual Time Base (VTB) register.
> > > 
> > > Hi Cyril,
> > > 
> > > Your solution seems simple and doesn't disturb the softlockup code as much
> > > as the x86 solution does.  The only small issue I had was the use of
> > > sched_clock instead of local_clock.  I keep forgetting the difference
> > > (unstable clock is the biggest reason I think).
> > My apologies there it appears I stuffed up, local_clock was used
> > initially in the softlockup code, I'll send a v2.
> 
> Thanks!
> 
> > 
> > > Other than that, I am not the biggest fan of putting multiple virtual
> > > guest solutions for the same problem into the watchdog code.  I would
> > > prefer a common solution/framework to leverage.
> > Agreed.
> > 
> > > I have the x86 folks focusing on the steal_time stuff.  It started with
> > > KVM and I believe VMWare is working on utilizing it too (and maybe Xen).
> > I'm not sure I've ever seen this, could you please point me towards
> > something I can look at?
> 
> I am not too familar with it, but the kernel/watchdog.c code has calls to
> kvm_check_and_clear_guest_paused(), which is probably a good place to
> start.
> 
Ah yes that, I did initially have a look at what it does when I
undertook to solve the problem on power and I suppose the two solutions
are similar in that they both just use a virtualised time source. The
similarities stop there though, the paravirtualised clock that x86 uses
provides (as the name of the function implies) a 'was paused' flag.
Obviously the flag isn't something the vtb register on power8 can
provide and since we have a vtb, its preferable to use that.
Perhaps x86 can do something with running_clock?

Regards,

Cyril

> Cheers,
> Don
> 
> > 
> > > Not sure if that is useful or could be incoporated into the power8 code.
> > > Though to be honest I am curious if the steal_time code could be ported to
> > > your solution as it seems the watchdog code could remove all the
> > > steal_time warts.
> > Happy to help sus out the situation here, again, if you could pass on
> > what the x86 guys are working on, thanks.
> > 
> > 
> > Thanks,
> > 
> > Cyril
> > > I have cc'd Marcelo into this discussion as he was the last person I
> > > remember talking with about this problem.
> > > 
> > > Cheers,
> > > Don
> > 
> > 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2015-01-08 Thread Cyril Bur
On Wed, 2015-01-07 at 11:20 +0100, Martin Schwidefsky wrote:
> On Tue, 06 Jan 2015 13:44:01 +1100
> Cyril Bur  wrote:
> 
> > On Mon, 2015-01-05 at 14:10 -0800, Andrew Morton wrote:
> > > On Mon, 22 Dec 2014 16:06:04 +1100 Cyril Bur  wrote:
> > > 
> > > > On POWER8 virtualised kernels the VTB register can be read to have a 
> > > > view of
> > > > time that only increases while the guest is running. This will prevent 
> > > > guests
> > > > from seeing time jump if a guest is paused for significant amounts of 
> > > > time.
> > > > 
> > > > On POWER7 and below virtualised kernels stolen time is subtracted from
> > > > sched_clock as a best effort approximation. This will not eliminate 
> > > > spurious
> > > > warnings in the case of a suspended guest but may reduce the occurance 
> > > > in the
> > > > case of softlockups due to host over commit.
> > > > 
> > > > Bare metal kernels should avoid reading the VTB as KVM does not restore 
> > > > sane
> > > > values when not executing. sched_clock is returned in this case.
> > > > 
> > > > --- a/arch/powerpc/kernel/time.c
> > > > +++ b/arch/powerpc/kernel/time.c
> > > > @@ -621,6 +621,30 @@ unsigned long long sched_clock(void)
> > > > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << 
> > > > tb_to_ns_shift;
> > > >  }
> > > >  
> > > > +unsigned long long running_clock(void)
> > > 
> > > Non-kvm kernels don't need this code.  Is there some appropriate
> > > "#ifdef CONFIG_foo" we can wrap this in?
> > CONFIG_PSERIES would work, having said that typical compilation for a
> > powernv kernel almost always includes CONFIG_PSERIES (although it
> > doesn't need to)... still, your point is valid, will add in v2.
> > > 
> > > 
> > > > +{
> > > > +   /*
> > > > +* Don't read the VTB as a host since KVM does not switch in 
> > > > host timebase
> > > > +* into the VTB when it takes a guest off the CPU, reading the 
> > > > VTB would
> > > > +* result in reading 'last switched out' guest VTB.
> > > > +*/
> > > > +
> > > > +   if (firmware_has_feature(FW_FEATURE_LPAR)) {
> > > > +   if (cpu_has_feature(CPU_FTR_ARCH_207S))
> > > > +   return mulhdu(get_vtb() - boot_tb, 
> > > > tb_to_ns_scale) << tb_to_ns_shift;
> > > > +
> > > > +   /* This is a next best approximation without a VTB. */
> > > > +   return sched_clock() - 
> > > > cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
> > > 
> > > Why is this result dependent on FW_FEATURE_LPAR?  It's all generic code.
> > Good point, the reason it ended up there is because I wanted to avoid
> > behaviour changes.
> > > 
> > > In fact the kernel/sched/clock.c default implementation of
> > > running_clock() could use this expression.  Would that be good or bad? :)
> > For power I'm almost certain it would be fine, on platforms which don't
> > do stolen time cpustat[CPUTIME_STEAL] should always be zero and if not
> > then the value should always be sane (although as mentioned in the
> > comment, not as accurate as using the VTB).
> > 
> > Putting it in the default implementation could cause behavioural changes
> > for x86 and s390, I would want their views on doing that.
> 
> I would prefer to make sched_clock do all the work. We have been thinking
> about steal time vs sched_clock as well, our solution would be to exchange
> the time source. Right now sched_clock is based on the TOD clock, the code
> that takes steal time into account would use the CPU timer instead.
> With the subtraction of kcpustat_this_cpu->cpustat[CPUTIME_STEAL] in
> common code we would have to add the same value in the sched_clock
> implementation as the steal time is already included in the CPU timer
> deltas.

Thanks for the quick reply Martin,
Sound like you've got ideas and while I didn't really grasp all of that,
I gather we best leave the common code as is.
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 0/2] Quieten softlockup detector on virtualised kernels

2015-01-08 Thread Cyril Bur
When the hypervisor pauses a virtualised kernel the kernel will observe a jump
in timebase, this can cause spurious messages from the softlockup detector.

Whilst these messages are harmless, they are accompanied with a stack trace
which causes undue concern and more problematically the stack trace in the
guest has nothing to do with the observed problem and can only be misleading.

Futhermore, on POWER8 this is completely avoidable with the introduction of
the Virtual Time Base (VTB) register.

V2:
Remove the export of running_clock
Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases.
Replaced the use of sched_clock_with local_clock it was used originally 
in
the softlockup detector.

Cyril Bur (2):
  Add another clock for use with the soft lockup watchdog.
  powerpc: add running_clock for powerpc to prevent spurious softlockup
warnings

 arch/powerpc/kernel/time.c | 32 
 include/linux/sched.h  |  1 +
 kernel/sched/clock.c   | 13 +
 kernel/watchdog.c  |  2 +-
 4 files changed, 47 insertions(+), 1 deletion(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2015-01-08 Thread Cyril Bur
On POWER8 virtualised kernels the VTB register can be read to have a view of
time that only increases while the guest is running. This will prevent guests
from seeing time jump if a guest is paused for significant amounts of time.

On POWER7 and below virtualised kernels stolen time is subtracted from
local_clock as a best effort approximation. This will not eliminate spurious
warnings in the case of a suspended guest but may reduce the occurance in the
case of softlockups due to host over commit.

Bare metal kernels should avoid reading the VTB as KVM does not restore sane
values when not executing, the approxmation is fine as host kernels won't
observe any stolen time.

Signed-off-by: Cyril Bur 
---
V2:
   Replaced the use of sched_clock_with local_clock it was used originally in
the softlockup detector.
   Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases.

---
 arch/powerpc/kernel/time.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fa7c4f1..fd35e5b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -621,6 +621,38 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
+
+#ifdef CONFIG_PPC_PSERIES
+
+/*
+ * Running clock - attempts to give a view of time passing for a virtualised
+ * kernels.
+ * Uses the VTB register if available otherwise a next best guess.
+ */
+unsigned long long running_clock(void)
+{
+   /*
+* Don't read the VTB as a host since KVM does not switch in host 
timebase
+* into the VTB when it takes a guest off the CPU, reading the VTB would
+* result in reading 'last switched out' guest VTB.
+*
+* Host kernels are often compiled with CONFIG_PSERIES checked, it 
would be
+* unsafe to rely only on the #ifdef above.
+*/
+   if (firmware_has_feature(FW_FEATURE_LPAR) &&
+   cpu_has_feature(CPU_FTR_ARCH_207S))
+   return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << 
tb_to_ns_shift;
+
+   /*
+* This is a next best approximation without a VTB.
+* On a host which is running bare metal there should never be any 
stolen
+* time and on a host which doesn't do any virtualisation TB *should* 
equal
+* VTB so it makes no difference anyway.
+*/
+   return local_clock() - 
cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
+}
+#endif
+
 static int __init get_freq(char *name, int cells, unsigned long *val)
 {
struct device_node *cpu;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/2] Add another clock for use with the soft lockup watchdog.

2015-01-08 Thread Cyril Bur
This permits the use of arch specific clocks for which virtualised kernels can
use their notion of 'running' time, not the elpased wall time which will
include host execution time.

Signed-off-by: Cyril Bur 
---
V2:
   Remove the export of running_clock
   Use local_clock instead of sched_clock as was initally used in the
softlockup detector

---
 include/linux/sched.h |  1 +
 kernel/sched/clock.c  | 13 +
 kernel/watchdog.c |  2 +-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db31ef..e400162 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2145,6 +2145,7 @@ extern unsigned long long notrace sched_clock(void);
  */
 extern u64 cpu_clock(int cpu);
 extern u64 local_clock(void);
+extern u64 running_clock(void);
 extern u64 sched_clock_cpu(int cpu);
 
 
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c27e4f8..c0a2051 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -420,3 +420,16 @@ u64 local_clock(void)
 
 EXPORT_SYMBOL_GPL(cpu_clock);
 EXPORT_SYMBOL_GPL(local_clock);
+
+/*
+ * Running clock - returns the time that has elapsed while a guest has been
+ * running.
+ * On a guest this value should be local_clock minus the time the guest was
+ * suspended by the hypervisor (for any reason).
+ * On bare metal this function should return the same as local_clock.
+ * Architectures and sub-architectures can override this.
+ */
+u64 __weak running_clock(void)
+{
+   return local_clock();
+}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 70bf118..3174bf8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -154,7 +154,7 @@ static int get_softlockup_thresh(void)
  */
 static unsigned long get_timestamp(void)
 {
-   return local_clock() >> 30LL;  /* 2^30 ~= 10^9 */
+   return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
 static void set_sample_period(void)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Add another clock for use with the soft lockup watchdog.

2014-11-30 Thread Cyril Bur
This permits the use of arch specific clocks for which virtualised kernels can
use their notion of 'running' time, not the elpased wall time which will
include host execution time.

Signed-off-by: Cyril Bur 
---
 include/linux/sched.h |  1 +
 kernel/sched/clock.c  | 14 ++
 kernel/watchdog.c |  2 +-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e344bb..12eb1d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2101,6 +2101,7 @@ extern unsigned long long notrace sched_clock(void);
  */
 extern u64 cpu_clock(int cpu);
 extern u64 local_clock(void);
+extern u64 running_clock(void);
 extern u64 sched_clock_cpu(int cpu);
 
 
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c27e4f8..c83af4f 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -74,6 +74,20 @@ unsigned long long __weak sched_clock(void)
 }
 EXPORT_SYMBOL_GPL(sched_clock);
 
+/*
+ * Running clock - returns the time that has elapsed while a guest has been
+ * running.
+ * On a guest this value should be sched_clock minus the time the
+ * guest was suspended by the hypervisor (for any reason).
+ * On bare metal this function should return the same as sched_clock.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __weak running_clock(void)
+{
+   return sched_clock();
+}
+EXPORT_SYMBOL_GPL(running_clock);
+
 __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 70bf118..3174bf8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -154,7 +154,7 @@ static int get_softlockup_thresh(void)
  */
 static unsigned long get_timestamp(void)
 {
-   return local_clock() >> 30LL;  /* 2^30 ~= 10^9 */
+   return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
 static void set_sample_period(void)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2014-11-30 Thread Cyril Bur
On POWER8 virtualised kernels the VTB register can be read to have a view of
time that only increases while the guest is running. This will prevent guests
from seeing time jump if a guest is paused for significant amounts of time.

On POWER7 and below virtualised kernels stolen time is subtracted from
sched_clock as a best effort approximation. This will not eliminate spurious
warnings in the case of a suspended guest but may reduce the occurance in the
case of softlockups due to host over commit.

Bare metal kernels should avoid reading the VTB as KVM does not restore sane
values when not executing. sched_clock is returned in this case.

Signed-off-by: Cyril Bur 
---
 arch/powerpc/kernel/time.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7505599..839aeae 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -621,6 +621,30 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
+unsigned long long running_clock(void)
+{
+   /*
+* Don't read the VTB as a host since KVM does not switch in host 
timebase
+* into the VTB when it takes a guest off the CPU, reading the VTB would
+* result in reading 'last switched out' guest VTB.
+*/
+
+   if (firmware_has_feature(FW_FEATURE_LPAR)) {
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << 
tb_to_ns_shift;
+
+   /* This is a next best approximation without a VTB. */
+   return sched_clock() - 
cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
+   }
+
+   /*
+* On a host which doesn't do any virtualisation TB *should* equal VTB 
so
+* it makes no difference anyway.
+*/
+
+   return sched_clock();
+}
+
 static int __init get_freq(char *name, int cells, unsigned long *val)
 {
struct device_node *cpu;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] Quieten softlockup detector on virtualised kernels

2014-11-30 Thread Cyril Bur
When the hypervisor pauses a virtualised kernel the kernel will observe a jump
in timebase, this can cause spurious messages from the softlockup detector.

Whilst these messages are harmless, they are accompanied with a stack trace
which causes undue concern and more problematically the stack trace in the
guest has nothing to do with the observed problem and can only be misleading.

Futhermore, on POWER8 this is completely avoidable with the introduction of
the Virtual Time Base (VTB) register.

Cyril Bur (2):
  Add another clock for use with the soft lockup watchdog.
  powerpc: add running_clock for powerpc to prevent spurious softlockup
warnings

 arch/powerpc/kernel/time.c | 24 
 include/linux/sched.h  |  1 +
 kernel/sched/clock.c   | 14 ++
 kernel/watchdog.c  |  2 +-
 4 files changed, 40 insertions(+), 1 deletion(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] Quieten softlockup detector on virtualised kernels

2014-12-21 Thread Cyril Bur
When the hypervisor pauses a virtualised kernel the kernel will observe a jump
in timebase, this can cause spurious messages from the softlockup detector.

Whilst these messages are harmless, they are accompanied with a stack trace
which causes undue concern and more problematically the stack trace in the
guest has nothing to do with the observed problem and can only be misleading.

Futhermore, on POWER8 this is completely avoidable with the introduction of
the Virtual Time Base (VTB) register.

Cyril Bur (2):
  Add another clock for use with the soft lockup watchdog.
  powerpc: add running_clock for powerpc to prevent spurious softlockup
warnings

 arch/powerpc/kernel/time.c | 24 
 include/linux/sched.h  |  1 +
 kernel/sched/clock.c   | 14 ++
 kernel/watchdog.c  |  2 +-
 4 files changed, 40 insertions(+), 1 deletion(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2014-12-21 Thread Cyril Bur
On POWER8 virtualised kernels the VTB register can be read to have a view of
time that only increases while the guest is running. This will prevent guests
from seeing time jump if a guest is paused for significant amounts of time.

On POWER7 and below virtualised kernels stolen time is subtracted from
sched_clock as a best effort approximation. This will not eliminate spurious
warnings in the case of a suspended guest but may reduce the occurance in the
case of softlockups due to host over commit.

Bare metal kernels should avoid reading the VTB as KVM does not restore sane
values when not executing. sched_clock is returned in this case.

Signed-off-by: Cyril Bur 
---
 arch/powerpc/kernel/time.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fa7c4f1..9ba13ec 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -621,6 +621,30 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
+unsigned long long running_clock(void)
+{
+   /*
+* Don't read the VTB as a host since KVM does not switch in host 
timebase
+* into the VTB when it takes a guest off the CPU, reading the VTB would
+* result in reading 'last switched out' guest VTB.
+*/
+
+   if (firmware_has_feature(FW_FEATURE_LPAR)) {
+   if (cpu_has_feature(CPU_FTR_ARCH_207S))
+   return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << 
tb_to_ns_shift;
+
+   /* This is a next best approximation without a VTB. */
+   return sched_clock() - 
cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
+   }
+
+   /*
+* On a host which doesn't do any virtualisation TB *should* equal VTB 
so
+* it makes no difference anyway.
+*/
+
+   return sched_clock();
+}
+
 static int __init get_freq(char *name, int cells, unsigned long *val)
 {
struct device_node *cpu;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Add another clock for use with the soft lockup watchdog.

2014-12-21 Thread Cyril Bur
This permits the use of arch specific clocks for which virtualised kernels can
use their notion of 'running' time, not the elpased wall time which will
include host execution time.

Signed-off-by: Cyril Bur 
---
 include/linux/sched.h |  1 +
 kernel/sched/clock.c  | 14 ++
 kernel/watchdog.c |  2 +-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db31ef..e400162 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2145,6 +2145,7 @@ extern unsigned long long notrace sched_clock(void);
  */
 extern u64 cpu_clock(int cpu);
 extern u64 local_clock(void);
+extern u64 running_clock(void);
 extern u64 sched_clock_cpu(int cpu);
 
 
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c27e4f8..c83af4f 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -74,6 +74,20 @@ unsigned long long __weak sched_clock(void)
 }
 EXPORT_SYMBOL_GPL(sched_clock);
 
+/*
+ * Running clock - returns the time that has elapsed while a guest has been
+ * running.
+ * On a guest this value should be sched_clock minus the time the
+ * guest was suspended by the hypervisor (for any reason).
+ * On bare metal this function should return the same as sched_clock.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __weak running_clock(void)
+{
+   return sched_clock();
+}
+EXPORT_SYMBOL_GPL(running_clock);
+
 __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 70bf118..3174bf8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -154,7 +154,7 @@ static int get_softlockup_thresh(void)
  */
 static unsigned long get_timestamp(void)
 {
-   return local_clock() >> 30LL;  /* 2^30 ~= 10^9 */
+   return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
 static void set_sample_period(void)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings

2015-02-04 Thread Cyril Bur
On Wed, 2015-02-04 at 11:42 +0100, Paul Bolle wrote:
> On Fri, 2015-01-09 at 14:34 +1100, Cyril Bur wrote:
> > On POWER8 virtualised kernels the VTB register can be read to have a view of
> > time that only increases while the guest is running. This will prevent 
> > guests
> > from seeing time jump if a guest is paused for significant amounts of time.
> > 
> > On POWER7 and below virtualised kernels stolen time is subtracted from
> > local_clock as a best effort approximation. This will not eliminate spurious
> > warnings in the case of a suspended guest but may reduce the occurance in 
> > the
> > case of softlockups due to host over commit.
> > 
> > Bare metal kernels should avoid reading the VTB as KVM does not restore sane
> > values when not executing, the approxmation is fine as host kernels won't
> > observe any stolen time.
> > 
> > Signed-off-by: Cyril Bur 
> > ---
> > V2:
> >Replaced the use of sched_clock_with local_clock it was used originally 
> > in
> > the softlockup detector.
> >Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases.
> 
> This became commit 3e5aba51e929 ("powerpc: add running_clock for powerpc
> to prevent spurious softlockup warnings") in today's linux-next (ie,
> next-20150204). I noticed because a script I use to check linux-next
> spotted a trivial issues with it.
> 
> > ---
> >  arch/powerpc/kernel/time.c | 32 
> >  1 file changed, 32 insertions(+)
> > 
> > diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> > index fa7c4f1..fd35e5b 100644
> > --- a/arch/powerpc/kernel/time.c
> > +++ b/arch/powerpc/kernel/time.c
> > @@ -621,6 +621,38 @@ unsigned long long sched_clock(void)
> > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
> >  }
> >  
> > +
> > +#ifdef CONFIG_PPC_PSERIES
> > +
> > +/*
> > + * Running clock - attempts to give a view of time passing for a 
> > virtualised
> > + * kernels.
> > + * Uses the VTB register if available otherwise a next best guess.
> > + */
> > +unsigned long long running_clock(void)
> > +{
> > +   /*
> > +* Don't read the VTB as a host since KVM does not switch in host 
> > timebase
> > +* into the VTB when it takes a guest off the CPU, reading the VTB would
> > +* result in reading 'last switched out' guest VTB.
> > +*
> > +* Host kernels are often compiled with CONFIG_PSERIES checked, it 
> > would be
> 
> You obviously wanted to use CONFIG_PPC_PSERIES here.
> 
Yep, sorry.

> Should I submit a trivial patch to fix that typo or do you prefer to do
> that yourself?
> 
Indeed a trivial fixup, I've pasted the hunk below. Andrew are you ok to
fold it in?

Thanks,

Cyril


diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fd35e5b..770dc16 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -636,8 +636,8 @@ unsigned long long running_clock(void)
 * into the VTB when it takes a guest off the CPU, reading the VTB would
 * result in reading 'last switched out' guest VTB.
 *
-* Host kernels are often compiled with CONFIG_PSERIES checked, it 
would be
-* unsafe to rely only on the #ifdef above.
+* Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it
+* would be unsafe to rely only on the #ifdef above.
 */
if (firmware_has_feature(FW_FEATURE_LPAR) &&
cpu_has_feature(CPU_FTR_ARCH_207S))
> Thanks,
> 
> 
> Paul Bolle
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/2] Quieten softlockup detector on virtualised kernels

2015-02-01 Thread Cyril Bur
Hi Andrew,

Could you please pick these patches up through your tree?

Thanks,

Cyril

On Fri, 2015-01-09 at 14:34 +1100, Cyril Bur wrote:
> When the hypervisor pauses a virtualised kernel the kernel will observe a jump
> in timebase, this can cause spurious messages from the softlockup detector.
> 
> Whilst these messages are harmless, they are accompanied with a stack trace
> which causes undue concern and more problematically the stack trace in the
> guest has nothing to do with the observed problem and can only be misleading.
> 
> Futhermore, on POWER8 this is completely avoidable with the introduction of
> the Virtual Time Base (VTB) register.
> 
> V2:
>   Remove the export of running_clock
>   Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases.
>   Replaced the use of sched_clock_with local_clock it was used originally 
> in
> the softlockup detector.
> 
> Cyril Bur (2):
>   Add another clock for use with the soft lockup watchdog.
>   powerpc: add running_clock for powerpc to prevent spurious softlockup
> warnings
> 
>  arch/powerpc/kernel/time.c | 32 
>  include/linux/sched.h  |  1 +
>  kernel/sched/clock.c   | 13 +
>  kernel/watchdog.c  |  2 +-
>  4 files changed, 47 insertions(+), 1 deletion(-)
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] powerpc: process.c: fix Kconfig typo

2016-10-26 Thread Cyril Bur
On Wed, 2016-10-26 at 16:52 +1100, Michael Ellerman wrote:
> Cyril Bur  writes:
> 
> > On Wed, 2016-10-05 at 07:57 +0200, Valentin Rothberg wrote:
> > > s/ALIVEC/ALTIVEC/
> > > 
> > 
> > Oops, nice catch
> > 
> > > Signed-off-by: Valentin Rothberg 
> > 
> > Reviewed-by: Cyril Bur 
> 
> How did we not notice? Sounds like we need a new selftest.
> 

Indeed...

Here is probably a good place to say why we didn't catch it and under
what circumstances this will have a negative effect.

If a thread performs and transaction with altivec and then gets
preempted for whatever reason, this bug may cause the kernel to not
reenable altivec when that thread runs again. This will result in an
altivec unavailable fault, when these faults happen inside a user
transaction the kernel has no choice but enable altivec and doom the
transaction.

The result is that transactions using altivec may get aborted more than
they should.

The difficulty in catching this with a selftest is my deliberate use of
the word may above. Optimisations to avoid FPU/altivec/VSX faults mean
that the kernel will always leave them on for 255 switches, this code
prevents the kernel turning it off if it got to the 256th switch (and
userspace was transactional)...


Cyril

> Looks like this should have:
> 
> Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware
> transactional memory in use")
> 
> 
> And I guess I need to start running checkkconfigsymbols.py on every
> commit.
> 
> cheers


Re: [PATCH] powerpc: process.c: fix Kconfig typo

2016-10-05 Thread Cyril Bur
On Wed, 2016-10-05 at 07:57 +0200, Valentin Rothberg wrote:
> s/ALIVEC/ALTIVEC/
> 

Oops, nice catch

> Signed-off-by: Valentin Rothberg 

Reviewed-by: Cyril Bur 

> ---
>  arch/powerpc/kernel/process.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/process.c
> b/arch/powerpc/kernel/process.c
> index 9e7c10fe205f..ce6dc61b15b2 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs)
>   /* Ensure that restore_math() will restore */
>   if (msr_diff & MSR_FP)
>   current->thread.load_fp = 1;
> -#ifdef CONFIG_ALIVEC
> +#ifdef CONFIG_ALTIVEC
>   if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
>   current->thread.load_vec = 1;
>  #endif


[PATCH] drivers/misc: Aspeed LPC control fix compile error and warning

2017-03-21 Thread Cyril Bur
pgprot_dmachoerent() is not defined on every architecture. Having
COMPILE_TEST set for the driver causes it to be compiled on
architectures which do not have pgprot_dmachoerent():
drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_mmap':
drivers/misc/aspeed-lpc-ctrl.c:51:9: error: implicit declaration of
function 'pgprot_dmacoherent' [-Werror=implicit-function-declaration]
prot = pgprot_dmacoherent(prot);

There are two possible solutions:
1. Remove COMPILE_TEST to ensure the driver is only compiled on ARM
2. Use pgprot_noncached() instead of pgprot_dmachoerent()

The first option results in less compile testing of the LPC control
driver which is undesirable.
The second option uses a function that is declared on all architectures
and therefore should always build. Currently there is no practical
difference between pgprot_noncached() and pgprot_dmachoerent() for the
aspeed chips that this driver is compatible with. The reason for
pgprot_dmachoerent() was that there may be chips made at some point in
the future that could include hardware that pgprot_dmachoerent() could
optimise for. As none of this hardware has even been announced there
isn't really a need for pgprot_dmachoerent().

Using pgprot_noncached() is completely correct and optimal for all
existing hardware on which the LPC control driver will run.

This commit also addresses that phys_addr_t should be printed using %pap
rather than %x:
In file included from include/linux/miscdevice.h:6:0,
 from drivers/misc/aspeed-lpc-ctrl.c:11:
drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_probe':
drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects
argument of type 'unsigned int', but argument 3 has type 'phys_addr_t
{aka long long unsigned int}' [-Wformat=]
dev_info(dev, "Loaded at 0x%08x (0x%08x)\n",

Signed-off-by: Cyril Bur 
---
 drivers/misc/aspeed-lpc-ctrl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c
index f6acbe1d9378..c654651a7b6d 100644
--- a/drivers/misc/aspeed-lpc-ctrl.c
+++ b/drivers/misc/aspeed-lpc-ctrl.c
@@ -48,7 +48,7 @@ static int aspeed_lpc_ctrl_mmap(struct file *file, struct 
vm_area_struct *vma)
return -EINVAL;
 
/* ast2400/2500 AHB accesses are not cache coherent */
-   prot = pgprot_dmacoherent(prot);
+   prot = pgprot_noncached(prot);
 
if (remap_pfn_range(vma, vma->vm_start,
(lpc_ctrl->mem_base >> PAGE_SHIFT) + vma->vm_pgoff,
@@ -229,8 +229,8 @@ static int aspeed_lpc_ctrl_probe(struct platform_device 
*pdev)
if (rc)
dev_err(dev, "Unable to register device\n");
else
-   dev_info(dev, "Loaded at 0x%08x (0x%08x)\n",
-   lpc_ctrl->mem_base, lpc_ctrl->mem_size);
+   dev_info(dev, "Loaded at %pap (0x%08x)\n",
+   &lpc_ctrl->mem_base, lpc_ctrl->mem_size);
 
return rc;
 }
-- 
2.12.0



[PATCH] drivers/misc: Aspeed LPC control fix format string warning

2017-03-22 Thread Cyril Bur
resource_size_t is a derivative of phys_addr_t and should also be
printed with %pap:
drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects
argument of type 'unsigned int', but argument 4 has type
'resource_size_t {aka long long unsigned int}' [-Wformat=] dev_info(dev,
"Loaded at %pap (0x%08x)\n",

Reported-by: Stephen Rothwell 
Signed-off-by: Cyril Bur 
---
 drivers/misc/aspeed-lpc-ctrl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c
index c654651a7b6d..2f6bb2244be5 100644
--- a/drivers/misc/aspeed-lpc-ctrl.c
+++ b/drivers/misc/aspeed-lpc-ctrl.c
@@ -229,8 +229,8 @@ static int aspeed_lpc_ctrl_probe(struct platform_device 
*pdev)
if (rc)
dev_err(dev, "Unable to register device\n");
else
-   dev_info(dev, "Loaded at %pap (0x%08x)\n",
-   &lpc_ctrl->mem_base, lpc_ctrl->mem_size);
+   dev_info(dev, "Loaded at %pap (%pap)\n",
+   &lpc_ctrl->mem_base, &lpc_ctrl->mem_size);
 
return rc;
 }
-- 
2.12.1



Re: linux-next: build failure after merge of the char-misc tree

2017-03-20 Thread Cyril Bur
On Tue, 2017-03-21 at 11:18 +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2017-03-20 at 13:23 +0100, Arnd Bergmann wrote:
> > On Mon, Mar 20, 2017 at 3:44 AM, Stephen Rothwell  
> > wrote:
> > > Hi all,
> > > 
> > > After merging the char-misc tree, today's linux-next build (x86_64
> > > allmodconfig) failed like this:
> > > 
> > > drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_mmap':
> > > drivers/misc/aspeed-lpc-ctrl.c:51:9: error: implicit declaration of 
> > > function 'pgprot_dmacoherent' [-Werror=implicit-function-declaration]
> > >   prot = pgprot_dmacoherent(prot);
> > 
> > A lot of other drivers (including /dev/mem) just use pgprot_noncached() or
> > pgprot_writecombine(), which would make the code portable and might be
> > what you want here as well.
> > 
> > pgprot_dmacoherent() is meant specifically for mapping RAM that is used
> > for DMA buffers that come from dma_alloc_coherent(), which doesn't seem
> > to be the case here.
> > 
> > What kind of address range is this really?
> 
> It's a piece of RAM that we reserve via a reserved region, which will
> be accessed by HW (sort-of-DMA, ie, the "host" system will access that
> using FW cycles on the LPC bus which we map to that reserved region of
> memory).
> 
> Joel, Cyril, can you send a 1-liner patch to change that to
> pgprot_noncached() ?
> 

Sure. Just to be clear - we want to keep COMPILE_TEST in kconfig?

Also I can't help but notice this: 
https://lists.ozlabs.org/pipermail/openbmc/2017-January/006219.html
[v3 of the series]
vs
https://lists.ozlabs.org/pipermail/openbmc/2017-February/006462.html
[v4 of the series]


> Cheers,
> Ben.
> 
> > > drivers/misc/aspeed-lpc-ctrl.c:51:7: error: incompatible types when 
> > > assigning to type 'pgprot_t {aka struct pgprot}' from type 'int'
> > >   prot = pgprot_dmacoherent(prot);
> > >    ^
> > > In file included from include/linux/miscdevice.h:6:0,
> > >  from drivers/misc/aspeed-lpc-ctrl.c:11:
> > > drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_probe':
> > > drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects 
> > > argument of type 'unsigned int', but argument 3 has type 'phys_addr_t 
> > > {aka long long unsigned int}' [-Wformat=]
> > >    dev_info(dev, "Loaded at 0x%08x (0x%08x)\n",
> > 
> > This should just use the "%pap" for printing a phys_addr_t, otherwise we
> > get the same warning on ARM in some configurations.

Thanks Arnd, I'll address that too.

> > 
> >   Arnd


Re: [PATCH v6] drivers/misc: Add Aspeed LPC control driver

2017-03-01 Thread Cyril Bur
On Wed, 2017-02-22 at 16:11 +1100, Suraj Jitindar Singh wrote:
> On Fri, 2017-02-17 at 14:28 +1100, Cyril Bur wrote:
> 
> I may be too late, but see below...
> > In order to manage server systems, there is typically another
> > processor
> > known as a BMC (Baseboard Management Controller) which is responsible
> > for powering the server and other various elements, sometimes fans,
> > often the system flash.
> > 
> > The Aspeed BMC family which is what is used on OpenPOWER machines and
> > a
> > number of x86 as well is typically connected to the host via an LPC
> > (Low Pin Count) bus (among others).
> > 
> > The LPC bus is an ISA bus on steroids. It's generally used by the
> > BMC chip to provide the host with access to the system flash (via
> > MEM/FW
> > cycles) that contains the BIOS or other host firmware along with a
> > number of SuperIO-style IOs (via IO space) such as UARTs, IPMI
> > controllers.
> > 
> > On the BMC chip side, this is all configured via a bunch of registers
> > whose content is related to a given policy of what devices are
> > exposed
> > at a per system level, which is system/vendor specific, so we don't
> > want
> > to bolt that into the BMC kernel. This started with a need to provide
> > something nicer than /dev/mem for user space to configure these
> > things.
> > 
> > One important aspect of the configuration is how the MEM/FW space is
> > exposed to the host (ie, the x86 or POWER). Some registers in that
> > bridge can define a window remapping all or portion of the LPC MEM/FW
> > space to a portion of the BMC internal bus, with no specific limits
> > imposed in HW.
> > 
> > I think it makes sense to ensure that this window is configured by a
> > kernel driver that can apply some serious sanity checks on what it is
> > configured to map.
> > 
> > In practice, user space wants to control this by flipping the mapping
> > between essentially two types of portions of the BMC address space:
> > 
> >    - The flash space. This is a region of the BMC MMIO space that
> > more/less directly maps the system flash (at least for reads, writes
> > are somewhat more complicated).
> > 
> >    - One (or more) reserved area(s) of the BMC physical memory.
> > 
> > The latter is needed for a number of things, such as avoiding letting
> > the host manipulate the innards of the BMC flash controller via some
> > evil backdoor, we want to do flash updates by routing the window to a
> > portion of memory (under control of a mailbox protocol via some
> > separate set of registers) which the host can use to write new data
> > in
> > bulk and then request the BMC to flash it. There are other uses, such
> > as allowing the host to boot from an in-memory flash image rather
> > than
> > the one in flash (very handy for continuous integration and test, the
> > BMC can just download new images).
> > 
> > It is important to note that due to the way the Aspeed chip lets the
> > kernel configure the mapping between host LPC addresses and BMC ram
> > addresses the offset within the window must be a multiple of size.
> > Not doing so will fragment the accessible space rather than simply
> > moving 'zero' upwards. This is caused by the nature of HICR8 being a
> > mask and the way host LPC addresses are translated.
> > 
> > Signed-off-by: Cyril Bur 
> > ---
> > v2:
> >    Removed unused functions
> >    Removed use of access_ok()
> >    All input is evil
> >    Reworked the interface as per Benjamin Herrenschmidts vision
> > v3:
> >    Removed 'default y' from Kconfig
> >    Reordered ioctl() struct fields
> >    Reworeded some comments
> > v4:
> >    Reorder ioctl() struct fields (again)
> > v5:
> >    Style cleanups
> >    Use of_address_to_resource()
> >    Document ioctl structure fields
> > v6:
> >    Check and fail ioctl() if flags field non-zero
> > 
> >  drivers/misc/Kconfig |   8 ++
> >  drivers/misc/Makefile|   1 +
> >  drivers/misc/aspeed-lpc-ctrl.c   | 267
> > +++
> >  include/uapi/linux/aspeed-lpc-ctrl.h |  60 
> >  4 files changed, 336 insertions(+)
> >  create mode 100644 drivers/misc/aspeed-lpc-ctrl.c
> >  create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h
> > 
> > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> > index 64971baf11fa..7dc4c369012f 100644
> > --- a/drivers/misc/Kco

[PATCH v5] drivers/misc: Add Aspeed LPC control driver

2017-02-13 Thread Cyril Bur
In order to manage server systems, there is typically another processor
known as a BMC (Baseboard Management Controller) which is responsible
for powering the server and other various elements, sometimes fans,
often the system flash.

The Aspeed BMC family which is what is used on OpenPOWER machines and a
number of x86 as well is typically connected to the host via an LPC
(Low Pin Count) bus (among others).

The LPC bus is an ISA bus on steroids. It's generally used by the
BMC chip to provide the host with access to the system flash (via MEM/FW
cycles) that contains the BIOS or other host firmware along with a
number of SuperIO-style IOs (via IO space) such as UARTs, IPMI
controllers.

On the BMC chip side, this is all configured via a bunch of registers
whose content is related to a given policy of what devices are exposed
at a per system level, which is system/vendor specific, so we don't want
to bolt that into the BMC kernel. This started with a need to provide
something nicer than /dev/mem for user space to configure these things.

One important aspect of the configuration is how the MEM/FW space is
exposed to the host (ie, the x86 or POWER). Some registers in that
bridge can define a window remapping all or portion of the LPC MEM/FW
space to a portion of the BMC internal bus, with no specific limits
imposed in HW.

I think it makes sense to ensure that this window is configured by a
kernel driver that can apply some serious sanity checks on what it is
configured to map.

In practice, user space wants to control this by flipping the mapping
between essentially two types of portions of the BMC address space:

   - The flash space. This is a region of the BMC MMIO space that
more/less directly maps the system flash (at least for reads, writes
are somewhat more complicated).

   - One (or more) reserved area(s) of the BMC physical memory.

The latter is needed for a number of things, such as avoiding letting
the host manipulate the innards of the BMC flash controller via some
evil backdoor, we want to do flash updates by routing the window to a
portion of memory (under control of a mailbox protocol via some
separate set of registers) which the host can use to write new data in
bulk and then request the BMC to flash it. There are other uses, such
as allowing the host to boot from an in-memory flash image rather than
the one in flash (very handy for continuous integration and test, the
BMC can just download new images).

It is important to note that due to the way the Aspeed chip lets the
kernel configure the mapping between host LPC addresses and BMC ram
addresses the offset within the window must be a multiple of size.
Not doing so will fragment the accessible space rather than simply
moving 'zero' upwards. This is caused by the nature of HICR8 being a
mask and the way host LPC addresses are translated.

Signed-off-by: Cyril Bur 
---
v2:
   Removed unused functions
   Removed use of access_ok()
   All input is evil
   Reworked the interface as per Benjamin Herrenschmidts vision
v3:
   Removed 'default y' from Kconfig
   Reordered ioctl() struct fields
   Reworeded some comments
v4:
   Reorder ioctl() struct fields (again)
v5:
   Style cleanups
   Use of_address_to_resource()
   Document ioctl structure fields


 drivers/misc/Kconfig |   8 ++
 drivers/misc/Makefile|   1 +
 drivers/misc/aspeed-lpc-ctrl.c   | 264 +++
 include/uapi/linux/aspeed-lpc-ctrl.h |  60 
 4 files changed, 333 insertions(+)
 create mode 100644 drivers/misc/aspeed-lpc-ctrl.c
 create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971baf11fa..7dc4c369012f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE
  An empty message will only clear the display at driver init time. Any 
other
  printf()-formatted message is valid with newline and escape codes.
 
+config ASPEED_LPC_CTRL
+   depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+   tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control"
+   ---help---
+ Control Aspeed ast2400/2500 HOST LPC to BMC mappings through
+ ioctl()s, the driver also provides a read/write interface to a BMC ram
+ region where the host LPC read/write region can be buffered.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 31983366090a..de1925a9c80b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/
 obj-$(CONFIG_VEXPRESS_SYSCFG)  += vexpress-syscfg.o
 obj-$(CONFIG_CXL_BASE) += cxl/
 obj-$(CONFIG_PANEL) += panel.o
+obj-$(CONFIG_ASPEED_LP

[PATCH v6] drivers/misc: Add Aspeed LPC control driver

2017-02-16 Thread Cyril Bur
In order to manage server systems, there is typically another processor
known as a BMC (Baseboard Management Controller) which is responsible
for powering the server and other various elements, sometimes fans,
often the system flash.

The Aspeed BMC family which is what is used on OpenPOWER machines and a
number of x86 as well is typically connected to the host via an LPC
(Low Pin Count) bus (among others).

The LPC bus is an ISA bus on steroids. It's generally used by the
BMC chip to provide the host with access to the system flash (via MEM/FW
cycles) that contains the BIOS or other host firmware along with a
number of SuperIO-style IOs (via IO space) such as UARTs, IPMI
controllers.

On the BMC chip side, this is all configured via a bunch of registers
whose content is related to a given policy of what devices are exposed
at a per system level, which is system/vendor specific, so we don't want
to bolt that into the BMC kernel. This started with a need to provide
something nicer than /dev/mem for user space to configure these things.

One important aspect of the configuration is how the MEM/FW space is
exposed to the host (ie, the x86 or POWER). Some registers in that
bridge can define a window remapping all or portion of the LPC MEM/FW
space to a portion of the BMC internal bus, with no specific limits
imposed in HW.

I think it makes sense to ensure that this window is configured by a
kernel driver that can apply some serious sanity checks on what it is
configured to map.

In practice, user space wants to control this by flipping the mapping
between essentially two types of portions of the BMC address space:

   - The flash space. This is a region of the BMC MMIO space that
more/less directly maps the system flash (at least for reads, writes
are somewhat more complicated).

   - One (or more) reserved area(s) of the BMC physical memory.

The latter is needed for a number of things, such as avoiding letting
the host manipulate the innards of the BMC flash controller via some
evil backdoor, we want to do flash updates by routing the window to a
portion of memory (under control of a mailbox protocol via some
separate set of registers) which the host can use to write new data in
bulk and then request the BMC to flash it. There are other uses, such
as allowing the host to boot from an in-memory flash image rather than
the one in flash (very handy for continuous integration and test, the
BMC can just download new images).

It is important to note that due to the way the Aspeed chip lets the
kernel configure the mapping between host LPC addresses and BMC ram
addresses the offset within the window must be a multiple of size.
Not doing so will fragment the accessible space rather than simply
moving 'zero' upwards. This is caused by the nature of HICR8 being a
mask and the way host LPC addresses are translated.

Signed-off-by: Cyril Bur 
---
v2:
   Removed unused functions
   Removed use of access_ok()
   All input is evil
   Reworked the interface as per Benjamin Herrenschmidts vision
v3:
   Removed 'default y' from Kconfig
   Reordered ioctl() struct fields
   Reworeded some comments
v4:
   Reorder ioctl() struct fields (again)
v5:
   Style cleanups
   Use of_address_to_resource()
   Document ioctl structure fields
v6:
   Check and fail ioctl() if flags field non-zero

 drivers/misc/Kconfig |   8 ++
 drivers/misc/Makefile|   1 +
 drivers/misc/aspeed-lpc-ctrl.c   | 267 +++
 include/uapi/linux/aspeed-lpc-ctrl.h |  60 
 4 files changed, 336 insertions(+)
 create mode 100644 drivers/misc/aspeed-lpc-ctrl.c
 create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971baf11fa..7dc4c369012f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE
  An empty message will only clear the display at driver init time. Any 
other
  printf()-formatted message is valid with newline and escape codes.
 
+config ASPEED_LPC_CTRL
+   depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+   tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control"
+   ---help---
+ Control Aspeed ast2400/2500 HOST LPC to BMC mappings through
+ ioctl()s, the driver also provides a read/write interface to a BMC ram
+ region where the host LPC read/write region can be buffered.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 31983366090a..de1925a9c80b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/
 obj-$(CONFIG_VEXPRESS_SYSCFG)  += vexpress-syscfg.o
 obj-$(CONFIG_CXL_BASE) += cxl/
 obj-$(CONFIG_PANEL

[PATCH v2] drivers/mailbox: Add Aspeed mailbox driver

2017-02-07 Thread Cyril Bur
This provides access to the mbox registers on the ast2400 and ast2500
SoCs.

This driver allows arbitrary reads and writes to the 16 data registers as
the other end may have configured the mbox hardware to provide an
interrupt when a specific register gets written to.

Signed-off-by: Cyril Bur 
---
V2:
   s/ASpeed/Aspeed/
   Reword Kconfig options
   Use tristate for config symbol

 drivers/mailbox/Kconfig   |   8 +
 drivers/mailbox/Makefile  |   2 +
 drivers/mailbox/aspeed-mbox.c | 334 ++
 3 files changed, 344 insertions(+)
 create mode 100644 drivers/mailbox/aspeed-mbox.c

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index ceff415f201c..e24044d5c219 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -152,4 +152,12 @@ config BCM_PDC_MBOX
  Mailbox implementation for the Broadcom PDC ring manager,
  which provides access to various offload engines on Broadcom
  SoCs. Say Y here if you want to use the Broadcom PDC.
+
+config ASPEED_LPC_MBOX
+   depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+   tristate "Aspeed LPC Mailbox Controller"
+   ---help---
+ Provides a driver for the MBOX registers found on Aspeed SOCs
+ (AST2400 and AST2500). This driver provides a device for aspeed
+ mbox registers
 endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 7dde4f609ae8..73165a79b517 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -31,3 +31,5 @@ obj-$(CONFIG_HI6220_MBOX) += hi6220-mailbox.o
 obj-$(CONFIG_BCM_PDC_MBOX) += bcm-pdc-mailbox.o
 
 obj-$(CONFIG_TEGRA_HSP_MBOX)   += tegra-hsp.o
+
+obj-$(CONFIG_ASPEED_LPC_MBOX)  += aspeed-mbox.o
diff --git a/drivers/mailbox/aspeed-mbox.c b/drivers/mailbox/aspeed-mbox.c
new file mode 100644
index ..0f0c711bafee
--- /dev/null
+++ b/drivers/mailbox/aspeed-mbox.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DEVICE_NAME"aspeed-mbox"
+
+#define ASPEED_MBOX_NUM_REGS 16
+
+#define ASPEED_MBOX_DATA_0 0x00
+#define ASPEED_MBOX_STATUS_0 0x40
+#define ASPEED_MBOX_STATUS_1 0x44
+#define ASPEED_MBOX_BMC_CTRL 0x48
+#define   ASPEED_MBOX_CTRL_RECV BIT(7)
+#define   ASPEED_MBOX_CTRL_MASK BIT(1)
+#define   ASPEED_MBOX_CTRL_SEND BIT(0)
+#define ASPEED_MBOX_HOST_CTRL 0x4c
+#define ASPEED_MBOX_INTERRUPT_0 0x50
+#define ASPEED_MBOX_INTERRUPT_1 0x54
+
+struct aspeed_mbox {
+   struct miscdevice   miscdev;
+   struct regmap   *regmap;
+   unsigned intbase;
+   wait_queue_head_t   queue;
+   struct mutexmutex;
+};
+
+static atomic_t aspeed_mbox_open_count = ATOMIC_INIT(0);
+
+static u8 aspeed_mbox_inb(struct aspeed_mbox *mbox, int reg)
+{
+   /*
+* The mbox registers are actually only one byte but are addressed
+* four bytes apart. The other three bytes are marked 'reserved',
+* they *should* be zero but lets not rely on it.
+* I am going to rely on the fact we can casually read/write to them...
+*/
+   unsigned int val = 0xff; /* If regmap throws an error return 0xff */
+   int rc = regmap_read(mbox->regmap, mbox->base + reg, &val);
+
+   if (rc)
+   dev_err(mbox->miscdev.parent, "regmap_read() failed with "
+   "%d (reg: 0x%08x)\n", rc, reg);
+
+   return val & 0xff;
+}
+
+static void aspeed_mbox_outb(struct aspeed_mbox *mbox, u8 data, int reg)
+{
+   int rc = regmap_write(mbox->regmap, mbox->base + reg, data);
+
+   if (rc)
+   dev_err(mbox->miscdev.parent, "regmap_write() failed with "
+   "%d (data: %u reg: 0x%08x)\n", rc, data, reg);
+}
+
+static struct aspeed_mbox *file_mbox(struct file *file)
+{
+   return container_of(file->private_data, struct aspeed_mbox, miscdev);
+}
+
+static int aspeed_mbox_open(struct inode *inode, struct file *file)
+{
+   struct aspeed_mbox *mbox = file_mbox(file);
+
+   if (atomic_inc_return(&aspeed_mbox_open_count) == 1) {
+   /*
+* Clear the interrupt status bit if it was left on and unmask
+* interrupts.
+* ASPEED_MBOX_CTRL_RECV bit is W1C, this also unmasks in 1 step
+*/
+   aspeed_mbox_outb(mbox, ASPEED_MBOX_CTRL_RECV, 
ASPEED_MBOX_BMC_CTRL);
+   return 0;
+   }
+
+   atomic_dec(&am

[PATCH v4] drivers/misc: Add Aspeed LPC control driver

2017-02-07 Thread Cyril Bur
In order to manage server systems, there is typically another processor
known as a BMC (Baseboard Management Controller) which is responsible
for powering the server and other various elements, sometimes fans,
often the system flash.

The Aspeed BMC family which is what is used on OpenPOWER machines and a
number of x86 as well is typically connected to the host via an LPC
(Low Pin Count) bus (among others).

The LPC bus is an ISA bus on steroids. It's generally used by the
BMC chip to provide the host with access to the system flash (via MEM/FW
cycles) that contains the BIOS or other host firmware along with a
number of SuperIO-style IOs (via IO space) such as UARTs, IPMI
controllers.

On the BMC chip side, this is all configured via a bunch of registers
whose content is related to a given policy of what devices are exposed
at a per system level, which is system/vendor specific, so we don't want
to bolt that into the BMC kernel. This started with a need to provide
something nicer than /dev/mem for user space to configure these things.

One important aspect of the configuration is how the MEM/FW space is
exposed to the host (ie, the x86 or POWER). Some registers in that
bridge can define a window remapping all or portion of the LPC MEM/FW
space to a portion of the BMC internal bus, with no specific limits
imposed in HW.

I think it makes sense to ensure that this window is configured by a
kernel driver that can apply some serious sanity checks on what it is
configured to map.

In practice, user space wants to control this by flipping the mapping
between essentially two types of portions of the BMC address space:

   - The flash space. This is a region of the BMC MMIO space that
more/less directly maps the system flash (at least for reads, writes
are somewhat more complicated).

   - One (or more) reserved area(s) of the BMC physical memory.

The latter is needed for a number of things, such as avoiding letting
the host manipulate the innards of the BMC flash controller via some
evil backdoor, we want to do flash updates by routing the window to a
portion of memory (under control of a mailbox protocol via some
separate set of registers) which the host can use to write new data in
bulk and then request the BMC to flash it. There are other uses, such
as allowing the host to boot from an in-memory flash image rather than
the one in flash (very handy for continuous integration and test, the
BMC can just download new images).

It is important to note that due to the way the Aspeed chip lets the
kernel configure the mapping between host LPC addresses and BMC ram
addresses the offset within the window must be a multiple of size.
Not doing so will fragment the accessible space rather than simply
moving 'zero' upwards. This is caused by the nature of HICR8 being a
mask and the way host LPC addresses are translated.

Signed-off-by: Cyril Bur 
---
v2:
Removed unused functions
Removed use of access_ok()
All input is evil
Reworked the interface as per Benjamin Herrenschmidts vision
V3:
Removed 'default y' from Kconfig
Reordered ioctl() struct fields
Reworeded some comments
V4:
Reorder ioctl() struct fields (again)

 drivers/misc/Kconfig |   8 ++
 drivers/misc/Makefile|   1 +
 drivers/misc/aspeed-lpc-ctrl.c   | 263 +++
 include/uapi/linux/aspeed-lpc-ctrl.h |  36 +
 4 files changed, 308 insertions(+)
 create mode 100644 drivers/misc/aspeed-lpc-ctrl.c
 create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 64971baf11fa..7dc4c369012f 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE
  An empty message will only clear the display at driver init time. Any 
other
  printf()-formatted message is valid with newline and escape codes.
 
+config ASPEED_LPC_CTRL
+   depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+   tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control"
+   ---help---
+ Control Aspeed ast2400/2500 HOST LPC to BMC mappings through
+ ioctl()s, the driver also provides a read/write interface to a BMC ram
+ region where the host LPC read/write region can be buffered.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 31983366090a..de1925a9c80b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/
 obj-$(CONFIG_VEXPRESS_SYSCFG)  += vexpress-syscfg.o
 obj-$(CONFIG_CXL_BASE) += cxl/
 obj-$(CONFIG_PANEL) += panel.o
+obj-$(CONFIG_ASPEED_LPC_CTRL)  += aspeed-lpc-ctrl.o
 
 lkdtm-$(CONFIG_LKDTM)

Re: [PATCH V10 02/28] powerpc, process: Add the function flush_tmregs_to_thread

2016-03-01 Thread Cyril Bur
On Tue, 16 Feb 2016 14:29:32 +0530
Anshuman Khandual  wrote:

> This patch creates a function flush_tmregs_to_thread which
> will then be used by subsequent patches in this series. The
> function checks for self tracing ptrace interface attempts
> while in the TM context and logs appropriate warning message.
> 

Hi Anshuman,

You'll have to bare with me, my ptrace knowledge is non existent so you might
have to walk me though some aspects.

I have been playing with FPU/VMX and VSX saving so I thought I'd take a look.

> Signed-off-by: Anshuman Khandual 
> ---
>  arch/powerpc/include/asm/switch_to.h |  8 
>  arch/powerpc/kernel/process.c| 20 
>  2 files changed, 28 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/switch_to.h 
> b/arch/powerpc/include/asm/switch_to.h
> index 5b268b6..7b297bf 100644
> --- a/arch/powerpc/include/asm/switch_to.h
> +++ b/arch/powerpc/include/asm/switch_to.h
> @@ -70,6 +70,14 @@ static inline void disable_kernel_spe(void)
>  }
>  #endif
>  
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +extern void flush_tmregs_to_thread(struct task_struct *);
> +#else
> +static inline void flush_tmregs_to_thread(struct task_struct *t)
> +{
> +}
> +#endif
> +
>  static inline void clear_task_ebb(struct task_struct *t)
>  {
>  #ifdef CONFIG_PPC_BOOK3S_64
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index dccc87e..2c4fa7f 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -918,6 +918,26 @@ static inline void restore_sprs(struct thread_struct 
> *old_thread,
>  #endif
>  }
>  



> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +void flush_tmregs_to_thread(struct task_struct *tsk)
> +{
> + /*
> +  * Process self tracing is not yet supported through
> +  * ptrace interface. Ptrace generic code should have
> +  * prevented this from happening in the first place.
> +  * Warn once here with the message, if some how it
> +  * is attempted.
> +  */
> + WARN_ONCE(tsk == current,
> + "Not expecting ptrace on self: TM regs may be incorrect\n");
> +
> + /*
> +  * If task is not current, it should have been flushed
> +  * already to it's thread_struct during __switch_to().
> +  */

I totally agree except this highlights something that I notice in subsequent
patches, and existing code. All the *_{get,set}() functions call
flush_*_to_thread() when, as per your comment (and my understanding of task
switching) there really shouldn't be a need to do that. My only thought is that
this could be a relic of uniprocessor days when it would have been necessary but
Anton recently stripped that out. Are you able to shed some light here?

The reason I ask is that if the flush_*_to_thread() calls ARE actually
important then I worry that this function is inadequate...

> +}
> +#endif
> +
>  struct task_struct *__switch_to(struct task_struct *prev,
>   struct task_struct *new)
>  {



Re: [PATCH V10 17/28] selftests, powerpc: Add ptrace tests for EBB

2016-03-01 Thread Cyril Bur
On Tue, 16 Feb 2016 14:29:47 +0530
Anshuman Khandual  wrote:

> This patch adds ptrace interface test for EBB specific
> registers. This also adds some generic ptrace interface
> based helper functions to be used by other patches later
> on in the series.
> 
> Signed-off-by: Anshuman Khandual 
> ---
>  tools/testing/selftests/powerpc/Makefile   |   3 +-
>  tools/testing/selftests/powerpc/ptrace/Makefile|   7 +
>  .../testing/selftests/powerpc/ptrace/ptrace-ebb.c  | 150 ++
>  .../testing/selftests/powerpc/ptrace/ptrace-ebb.h  | 103 ++
>  tools/testing/selftests/powerpc/ptrace/ptrace.h| 225 
> +
>  5 files changed, 487 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/Makefile
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-ebb.h
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.h
> 
> diff --git a/tools/testing/selftests/powerpc/Makefile 
> b/tools/testing/selftests/powerpc/Makefile
> index 0c2706b..5b3c62c 100644
> --- a/tools/testing/selftests/powerpc/Makefile
> +++ b/tools/testing/selftests/powerpc/Makefile
> @@ -22,7 +22,8 @@ SUB_DIRS = benchmarks   \
>  switch_endian\
>  syscalls \
>  tm   \
> -vphn
> +vphn \
> +ptrace
>  
>  endif
>  
> diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile 
> b/tools/testing/selftests/powerpc/ptrace/Makefile
> new file mode 100644
> index 000..8666ac0
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/Makefile
> @@ -0,0 +1,7 @@
> +TEST_PROGS := ptrace-ebb
> +all: $(TEST_PROGS)
> +
> +$(TEST_PROGS): ../harness.c ptrace.S ../utils.c
> +ptrace-ebb: ../pmu/event.c ../pmu/lib.c ../pmu/ebb/ebb_handler.S 
> ../pmu/ebb/busy_loop.S
> +clean:
> + rm -f $(TEST_PROGS) *.o

If you:
include ../../lib.mk

Then this Makefile just works with run_tests and install targets from the
base Makefile

> diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c 
> b/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c
> new file mode 100644
> index 000..e1ca608
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c
> @@ -0,0 +1,150 @@
> +/*
> + * Ptrace interface test for EBB
> + *
> + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +#include "../pmu/ebb/ebb.h"
> +#include "ptrace.h"
> +#include "ptrace-ebb.h"
> +

Check out tools/testing/selftests/powerpc/utils.h which provides a nice (and
super hacky) FAIL_IF() macro.

> +void ebb(void)
> +{
> + struct event event;
> +
> + event_init_named(&event, 0x1001e, "cycles");
> + event.attr.config |= (1ull << 63);
> +event.attr.exclusive = 1;
> +event.attr.pinned = 1;
> + event.attr.exclude_kernel = 1;
> + event.attr.exclude_hv = 1;
> + event.attr.exclude_idle = 1;
> +
> + if (event_open(&event)) {
> + perror("event_open() failed");
> + exit(1);
> + }
> +
> + setup_ebb_handler(standard_ebb_callee);
> + mtspr(SPRN_BESCR, 0x8001ull);
> +
> + mb();
> +
> + if (ebb_event_enable(&event)) {
> + perror("ebb_event_handler() failed");
> + exit(1);
> + }
> +
> + mtspr(SPRN_PMC1, pmc_sample_period(SAMPLE_PERIOD));
> + while(1)
> + core_busy_loop();
> + exit(0);
> +}
> +
> +int validate_ebb(struct ebb_regs *regs)
> +{
> + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> + struct opd *opd = (struct opd *) ebb_handler;
> + #endif
> +
> + printf("EBBRR: %lx\n", regs->ebbrr);
> + printf("EBBHR: %lx\n", regs->ebbhr);
> + printf("BESCR: %lx\n", regs->bescr);
> + printf("SIAR:  %lx\n", regs->siar);
> + printf("SDAR:  %lx\n", regs->sdar);
> + printf("SIER:  %lx\n", regs->sier);
> + printf("MMCR2: %lx\n", regs->mmcr2);
> + printf("MMCR0: %lx\n", regs->mmcr0);
> +
> + /* Validate EBBHR */
> + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> + if (regs->ebbhr != opd->entry)
> + return TEST_FAIL;
> + #else
> + if (regs->ebbhr != (unsigned long) ebb_handler)
> + return TEST_FAIL;
> + #endif
> +
> + /* Validate SIER */
> + if (regs->sier != SIER_EXP)
> + return TEST_FAIL;
> +
> + /* Validate MMCR2 */
> + if (regs->mmcr2 != MMCR2_EXP)
> + return TEST_FAIL;
> +
> + /* Validate MMCR0 */
> + if (regs->mmcr0 != MMCR0_EXP)
> + return TEST_FAIL;
> +
> + return TEST_PASS;
> +}
> +
> +int trace_ebb(pid_t child)
> +{
> + struct ebb_r

Re: [PATCH V10 18/28] selftests, powerpc: Add ptrace tests for GPR/FPR registers

2016-03-01 Thread Cyril Bur
On Tue, 16 Feb 2016 14:29:48 +0530
Anshuman Khandual  wrote:

> This patch adds ptrace interface test for GPR/FPR registers.
> This adds ptrace interface based helper functions related to
> GPR/FPR access and some assembly helper functions related to
> GPR/FPR registers.
> 

I wonder if with a bit of thought we could share our assembly helper
functions... https://patchwork.ozlabs.org/patch/589703/

Let me know, happy to meet in the middle and augment my series if needed.

Thanks,

Cyril

> Signed-off-by: Anshuman Khandual 
> ---
>  tools/testing/selftests/powerpc/ptrace/Makefile|   3 +-
>  .../testing/selftests/powerpc/ptrace/ptrace-gpr.c  | 191 +++
>  .../testing/selftests/powerpc/ptrace/ptrace-gpr.h  |  73 
>  tools/testing/selftests/powerpc/ptrace/ptrace.S| 131 +
>  tools/testing/selftests/powerpc/ptrace/ptrace.h| 208 
> +
>  5 files changed, 605 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
>  create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.S
> 
> diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile 
> b/tools/testing/selftests/powerpc/ptrace/Makefile
> index 8666ac0..f5f62d0 100644
> --- a/tools/testing/selftests/powerpc/ptrace/Makefile
> +++ b/tools/testing/selftests/powerpc/ptrace/Makefile
> @@ -1,4 +1,5 @@
> -TEST_PROGS := ptrace-ebb
> +TEST_PROGS := ptrace-ebb ptrace-gpr
> +
>  all: $(TEST_PROGS)
>  
>  $(TEST_PROGS): ../harness.c ptrace.S ../utils.c
> diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c 
> b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
> new file mode 100644
> index 000..f84f0e4
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
> @@ -0,0 +1,191 @@
> +/*
> + * Ptrace test for GPR/FPR registers
> + *
> + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +#include "ptrace.h"
> +#include "ptrace-gpr.h"
> +
> +/* Tracer and Tracee Shared Data */
> +int shm_id;
> +volatile int *cptr, *pptr;
> +
> +extern void store_gpr(unsigned long *addr);
> +extern void store_fpr(float *addr);
> +
> +float a = FPR_1;
> +float b = FPR_2;
> +float c = FPR_3;
> +
> +void gpr(void)
> +{
> + unsigned long gpr_buf[18];
> + float fpr_buf[32];
> +
> + cptr = (int *)shmat(shm_id, NULL, 0);
> +
> + asm __volatile__(
> + "li 14, %[gpr_1];"
> + "li 15, %[gpr_1];"
> + "li 16, %[gpr_1];"
> + "li 17, %[gpr_1];"
> + "li 18, %[gpr_1];"
> + "li 19, %[gpr_1];"
> + "li 20, %[gpr_1];"
> + "li 21, %[gpr_1];"
> + "li 22, %[gpr_1];"
> + "li 23, %[gpr_1];"
> + "li 24, %[gpr_1];"
> + "li 25, %[gpr_1];"
> + "li 26, %[gpr_1];"
> + "li 27, %[gpr_1];"
> + "li 28, %[gpr_1];"
> + "li 29, %[gpr_1];"
> + "li 30, %[gpr_1];"
> + "li 31, %[gpr_1];"
> +
> + "lfs 0, 0(%[flt_1]);"
> + "lfs 1, 0(%[flt_1]);"
> + "lfs 2, 0(%[flt_1]);"
> + "lfs 3, 0(%[flt_1]);"
> + "lfs 4, 0(%[flt_1]);"
> + "lfs 5, 0(%[flt_1]);"
> + "lfs 6, 0(%[flt_1]);"
> + "lfs 7, 0(%[flt_1]);"
> + "lfs 8, 0(%[flt_1]);"
> + "lfs 9, 0(%[flt_1]);"
> + "lfs 10, 0(%[flt_1]);"
> + "lfs 11, 0(%[flt_1]);"
> + "lfs 12, 0(%[flt_1]);"
> + "lfs 13, 0(%[flt_1]);"
> + "lfs 14, 0(%[flt_1]);"
> + "lfs 15, 0(%[flt_1]);"
> + "lfs 16, 0(%[flt_1]);"
> + "lfs 17, 0(%[flt_1]);"
> + "lfs 18, 0(%[flt_1]);"
> + "lfs 19, 0(%[flt_1]);"
> + "lfs 20, 0(%[flt_1]);"
> + "lfs 21, 0(%[flt_1]);"
> + "lfs 22, 0(%[flt_1]);"
> + "lfs 23, 0(%[flt_1]);"
> + "lfs 24, 0(%[flt_1]);"
> + "lfs 25, 0(%[flt_1]);"
> + "lfs 26, 0(%[flt_1]);"
> + "lfs 27, 0(%[flt_1]);"
> + "lfs 28, 0(%[flt_1]);"
> + "lfs 29, 0(%[flt_1]);"
> + "lfs 30, 0(%[flt_1]);"
> + "lfs 31, 0(%[flt_1]);"
> +
> + :
> + :[gpr_1]"i"(GPR_1), [flt_1] "r" (&a)
> + : "memory", "r6", "r7", "r8", "r9", "r10",
> + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", 
> "r20",
> + "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", 
> "r30", "r31"
> + );
> +
> + while(!cptr[0]);
> +
> + store_gpr(gpr_buf);
> + 

Re: [PATCH V10 02/28] powerpc, process: Add the function flush_tmregs_to_thread

2016-03-01 Thread Cyril Bur
On Wed, 02 Mar 2016 09:59:06 +0530
Anshuman Khandual  wrote:

> On 03/02/2016 05:45 AM, Cyril Bur wrote:
> > On Tue, 16 Feb 2016 14:29:32 +0530
> > Anshuman Khandual  wrote:
> >   
> >> This patch creates a function flush_tmregs_to_thread which
> >> will then be used by subsequent patches in this series. The
> >> function checks for self tracing ptrace interface attempts
> >> while in the TM context and logs appropriate warning message.
> >>  
> > 
> > Hi Anshuman,
> > 
> > You'll have to bare with me, my ptrace knowledge is non existent so you 
> > might
> > have to walk me though some aspects.
> > 
> > I have been playing with FPU/VMX and VSX saving so I thought I'd take a 
> > look.  
> 
> Sure.
> 
> 
> >   
> >> Signed-off-by: Anshuman Khandual 
> >> ---
> >>  arch/powerpc/include/asm/switch_to.h |  8 
> >>  arch/powerpc/kernel/process.c| 20 
> >>  2 files changed, 28 insertions(+)
> >>
> >> diff --git a/arch/powerpc/include/asm/switch_to.h 
> >> b/arch/powerpc/include/asm/switch_to.h
> >> index 5b268b6..7b297bf 100644
> >> --- a/arch/powerpc/include/asm/switch_to.h
> >> +++ b/arch/powerpc/include/asm/switch_to.h
> >> @@ -70,6 +70,14 @@ static inline void disable_kernel_spe(void)
> >>  }
> >>  #endif
> >>  
> >> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >> +extern void flush_tmregs_to_thread(struct task_struct *);
> >> +#else
> >> +static inline void flush_tmregs_to_thread(struct task_struct *t)
> >> +{
> >> +}
> >> +#endif
> >> +
> >>  static inline void clear_task_ebb(struct task_struct *t)
> >>  {
> >>  #ifdef CONFIG_PPC_BOOK3S_64
> >> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> >> index dccc87e..2c4fa7f 100644
> >> --- a/arch/powerpc/kernel/process.c
> >> +++ b/arch/powerpc/kernel/process.c
> >> @@ -918,6 +918,26 @@ static inline void restore_sprs(struct thread_struct 
> >> *old_thread,
> >>  #endif
> >>  }
> >>
> > 
> > 
> >   
> >> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> >> +void flush_tmregs_to_thread(struct task_struct *tsk)
> >> +{
> >> +  /*
> >> +   * Process self tracing is not yet supported through
> >> +   * ptrace interface. Ptrace generic code should have
> >> +   * prevented this from happening in the first place.
> >> +   * Warn once here with the message, if some how it
> >> +   * is attempted.
> >> +   */
> >> +  WARN_ONCE(tsk == current,
> >> +  "Not expecting ptrace on self: TM regs may be incorrect\n");
> >> +
> >> +  /*
> >> +   * If task is not current, it should have been flushed
> >> +   * already to it's thread_struct during __switch_to().
> >> +   */  
> > 
> > I totally agree except this highlights something that I notice in subsequent
> > patches, and existing code. All the *_{get,set}() functions call
> > flush_*_to_thread() when, as per your comment (and my understanding of task
> > switching) there really shouldn't be a need to do that. My only thought is 
> > that
> > this could be a relic of uniprocessor days when it would have been 
> > necessary but
> > Anton recently stripped that out. Are you able to shed some light here?  
> 
> Its been sometime I had looked into this aspect of the series. I remember
> Michael Neuling and myself discussed about this and settled on a single
> WARN_ON here as nothing else was required to be done in the function. It
> may be possible that all the flush_*_to_thread() functions used else where
> are because of uniprocessor concerns. I dont understand completely our
> context save/restore paths including the lazy ones. I believed that these
> flush_*_to_thread() routines just made sure task struct has the latest
> values of the thread context in case of some complicated save/restore
> paths might not have done the complete save at that point in time.
> 

Well as you note in the comment though, it should be done since we've gone
through __switch_to()...

> If you think that all these flush_*_to_thread() functions used through
> out POWER ptrace need review to see whether they are required or not
> anymore I would suggest we should do it as a separate patch after this
> series and I am willing to work with you on that.

I THINK your patches are correct and we're just performing needless
flush_*_to_thread() calls now in which case its fine and we can review laster,
my concern is that I've been wrong before so having flush_tmregs_to_thread() do
nothing worries me. I wonder if Mr Neuling or Mr Ellerman have anything to say
on the subject...

> 
> > 
> > The reason I ask is that if the flush_*_to_thread() calls ARE actually
> > important then I worry that this function is inadequate...  
> 
> I guess we went through that and finally settled on WARN_ON once but dont
> remember the exact context now. Will look into all previous discussions
> on this and get back.

Thanks,

Cyril
> 



Re: [PATCH V5] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-26 Thread Cyril Bur
On Mon, 2017-06-26 at 11:02 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 

I feel obliged to check that an occ request_id can be zero, I'm sure
its fine - just have to ask, zero is so often special.

I should also point out I don't know must about how the OCC works or
anything so I would be best if there were other eyes on this.

Provided zero is an ok request_id:
Reviewed-by: Cyril Bur 

> Signed-off-by: Shilpasri G Bhat 
> ---
> The skiboot patch for the interface is posted here:
> https://lists.ozlabs.org/pipermail/skiboot/2017-June/007960.html
> 
> Changes from V4:
> - Add token as a parameter to the opal_occ_command()
> - Use per-occ counter for command request_id instead of using async
>   token.
> 
>  arch/powerpc/include/asm/opal-api.h|  41 +++-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 303 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   8 +
>  6 files changed, 356 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_OCC_INVALID_STATE   -33
> +#define OPAL_OCC_BUSY-34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH-36
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +194,8 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST149
>  
>  /* Device tree flags */
>  
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>  
>  struct opal_prd_msg;
>  
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_SELECT_SENSOR_GROUPS,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + __be64 cdata;
> + __be64 rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + __be16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + __be16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
>  #define OCC_RESET   0
>  #define OCC_LOAD1
>  #define OCC_THROTTLE2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..84659bd 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> +  int token, bool retry);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.

Re: [PATCH v2 2/3] misc: aspeed-lpc: Request and enable LPC clock

2018-02-19 Thread Cyril Bur
On Mon, 2018-02-19 at 17:54 +1030, Joel Stanley wrote:
> The LPC device needs to ensure it's clock is enabled before it can do
> anything.
> 
> In the past the clock was enabled and left running by u-boot, however
> Linux now has an upstream clock driver that disables unused clocks.
> 
> Tested-by: Lei YU 
> Reviewed-by: Andrew Jeffery 
> Signed-off-by: Joel Stanley 

Reviewed-by: Cyril Bur 

> ---
>  drivers/misc/aspeed-lpc-ctrl.c | 26 +++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c
> index b5439643f54b..1827b7aa6674 100644
> --- a/drivers/misc/aspeed-lpc-ctrl.c
> +++ b/drivers/misc/aspeed-lpc-ctrl.c
> @@ -7,6 +7,7 @@
>   * 2 of the License, or (at your option) any later version.
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -26,6 +27,7 @@
>  struct aspeed_lpc_ctrl {
>   struct miscdevice   miscdev;
>   struct regmap   *regmap;
> + struct clk  *clk;
>   phys_addr_t mem_base;
>   resource_size_t mem_size;
>   u32 pnor_size;
> @@ -221,16 +223,33 @@ static int aspeed_lpc_ctrl_probe(struct platform_device 
> *pdev)
>   return -ENODEV;
>   }
>  
> + lpc_ctrl->clk = devm_clk_get(dev, NULL);
> + if (IS_ERR(lpc_ctrl->clk)) {
> + dev_err(dev, "couldn't get clock\n");
> + return PTR_ERR(lpc_ctrl->clk);
> + }
> + rc = clk_prepare_enable(lpc_ctrl->clk);
> + if (rc) {
> + dev_err(dev, "couldn't enable clock\n");
> + return rc;
> + }
> +
>   lpc_ctrl->miscdev.minor = MISC_DYNAMIC_MINOR;
>   lpc_ctrl->miscdev.name = DEVICE_NAME;
>   lpc_ctrl->miscdev.fops = &aspeed_lpc_ctrl_fops;
>   lpc_ctrl->miscdev.parent = dev;
>   rc = misc_register(&lpc_ctrl->miscdev);
> - if (rc)
> + if (rc) {
>   dev_err(dev, "Unable to register device\n");
> - else
> - dev_info(dev, "Loaded at %pr\n", &resm);
> + goto err;
> + }
> +
> + dev_info(dev, "Loaded at %pr\n", &resm);
> +
> + return 0;
>  
> +err:
> + clk_disable_unprepare(lpc_ctrl->clk);
>   return rc;
>  }
>  
> @@ -239,6 +258,7 @@ static int aspeed_lpc_ctrl_remove(struct platform_device 
> *pdev)
>   struct aspeed_lpc_ctrl *lpc_ctrl = dev_get_drvdata(&pdev->dev);
>  
>   misc_deregister(&lpc_ctrl->miscdev);
> + clk_disable_unprepare(lpc_ctrl->clk);
>  
>   return 0;
>  }


Re: [PATCH v2 3/3] misc: aspeed-lpc-ctrl: Enable FWH and A2H bridge cycles

2018-02-19 Thread Cyril Bur
On Mon, 2018-02-19 at 17:54 +1030, Joel Stanley wrote:
> To date this driver has relied on prevous state from out of tree hacks
> and vendor u-boot trees in order to have the host be able to access
> data over the LPC bus.
> 
> Now we explicitly enable the AHB to LPC bridge and FWH cycles from when
> the user first configures the address to map. We chose to do this then
> as before that time there is no way for the kernel to know where it is
> safe to point the LPC window.
> 
> Tested-by: Lei YU 
> Reviewed-by: Andrew Jeffery 
> Signed-off-by: Joel Stanley 

Reviewed-by: Cyril Bur 

> ---
>  drivers/misc/aspeed-lpc-ctrl.c | 18 --
>  1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c
> index 1827b7aa6674..a024f8042259 100644
> --- a/drivers/misc/aspeed-lpc-ctrl.c
> +++ b/drivers/misc/aspeed-lpc-ctrl.c
> @@ -21,6 +21,10 @@
>  
>  #define DEVICE_NAME  "aspeed-lpc-ctrl"
>  
> +#define HICR5 0x0
> +#define HICR5_ENL2H  BIT(8)
> +#define HICR5_ENFWH  BIT(10)
> +
>  #define HICR7 0x8
>  #define HICR8 0xc
>  
> @@ -155,8 +159,18 @@ static long aspeed_lpc_ctrl_ioctl(struct file *file, 
> unsigned int cmd,
>   if (rc)
>   return rc;
>  
> - return regmap_write(lpc_ctrl->regmap, HICR8,
> - (~(map.size - 1)) | ((map.size >> 16) - 1));
> + rc = regmap_write(lpc_ctrl->regmap, HICR8,
> + (~(map.size - 1)) | ((map.size >> 16) - 1));
> + if (rc)
> + return rc;
> +
> + /*
> +  * Enable LPC FHW cycles. This is required for the host to
> +  * access the regions specified.
> +  */
> + return regmap_update_bits(lpc_ctrl->regmap, HICR5,
> + HICR5_ENFWH | HICR5_ENL2H,
> + HICR5_ENFWH | HICR5_ENL2H);
>   }
>  
>   return -EINVAL;


Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver

2017-09-04 Thread Cyril Bur
Hi,

I haven't heard anything about this driver. I'm trying to interpret if
the silence is because there is something fundamentally wrong with the
driver or is it because it doesn't use any of the mailbox
infrastructure it is being ignored.

Either way I'm happy to address the problems - in the event of there
being something wrong, I'm more than happy to take feedback and address
problems. If you would prefer to see it moved somewhere into char/misc,
this is also something I can do. Moving into char/misc is my preferred
option, I had a look at the mailbox infrastructure and it seems a bit
heavyweight for what this ultimately very simple driver is doing.

If you would be so kind as to let me know which direction to take.

Thanks,

Cyril

On Wed, 2017-02-08 at 10:36 +1100, Cyril Bur wrote:
> This provides access to the mbox registers on the ast2400 and ast2500
> SoCs.
> 
> This driver allows arbitrary reads and writes to the 16 data registers as
> the other end may have configured the mbox hardware to provide an
> interrupt when a specific register gets written to.
> 
> Signed-off-by: Cyril Bur 
> ---
> V2:
>s/ASpeed/Aspeed/
>Reword Kconfig options
>Use tristate for config symbol
> 
>  drivers/mailbox/Kconfig   |   8 +
>  drivers/mailbox/Makefile  |   2 +
>  drivers/mailbox/aspeed-mbox.c | 334 
> ++
>  3 files changed, 344 insertions(+)
>  create mode 100644 drivers/mailbox/aspeed-mbox.c
> 
> diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
> index ceff415f201c..e24044d5c219 100644
> --- a/drivers/mailbox/Kconfig
> +++ b/drivers/mailbox/Kconfig
> @@ -152,4 +152,12 @@ config BCM_PDC_MBOX
> Mailbox implementation for the Broadcom PDC ring manager,
> which provides access to various offload engines on Broadcom
> SoCs. Say Y here if you want to use the Broadcom PDC.
> +
> +config ASPEED_LPC_MBOX
> + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
> + tristate "Aspeed LPC Mailbox Controller"
> + ---help---
> +   Provides a driver for the MBOX registers found on Aspeed SOCs
> +   (AST2400 and AST2500). This driver provides a device for aspeed
> +   mbox registers
>  endif
> diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
> index 7dde4f609ae8..73165a79b517 100644
> --- a/drivers/mailbox/Makefile
> +++ b/drivers/mailbox/Makefile
> @@ -31,3 +31,5 @@ obj-$(CONFIG_HI6220_MBOX)   += hi6220-mailbox.o
>  obj-$(CONFIG_BCM_PDC_MBOX)   += bcm-pdc-mailbox.o
>  
>  obj-$(CONFIG_TEGRA_HSP_MBOX) += tegra-hsp.o
> +
> +obj-$(CONFIG_ASPEED_LPC_MBOX)+= aspeed-mbox.o
> diff --git a/drivers/mailbox/aspeed-mbox.c b/drivers/mailbox/aspeed-mbox.c
> new file mode 100644
> index ..0f0c711bafee
> --- /dev/null
> +++ b/drivers/mailbox/aspeed-mbox.c
> @@ -0,0 +1,334 @@
> +/*
> + * Copyright 2017 IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define DEVICE_NAME  "aspeed-mbox"
> +
> +#define ASPEED_MBOX_NUM_REGS 16
> +
> +#define ASPEED_MBOX_DATA_0 0x00
> +#define ASPEED_MBOX_STATUS_0 0x40
> +#define ASPEED_MBOX_STATUS_1 0x44
> +#define ASPEED_MBOX_BMC_CTRL 0x48
> +#define   ASPEED_MBOX_CTRL_RECV BIT(7)
> +#define   ASPEED_MBOX_CTRL_MASK BIT(1)
> +#define   ASPEED_MBOX_CTRL_SEND BIT(0)
> +#define ASPEED_MBOX_HOST_CTRL 0x4c
> +#define ASPEED_MBOX_INTERRUPT_0 0x50
> +#define ASPEED_MBOX_INTERRUPT_1 0x54
> +
> +struct aspeed_mbox {
> + struct miscdevice   miscdev;
> + struct regmap   *regmap;
> + unsigned intbase;
> + wait_queue_head_t   queue;
> + struct mutexmutex;
> +};
> +
> +static atomic_t aspeed_mbox_open_count = ATOMIC_INIT(0);
> +
> +static u8 aspeed_mbox_inb(struct aspeed_mbox *mbox, int reg)
> +{
> + /*
> +  * The mbox registers are actually only one byte but are addressed
> +  * four bytes apart. The other three bytes are marked 'reserved',
> +  * they *should* be zero but lets not rely on it.
> +  * I am going to rely on the fact we can casually read/write to them...
> +  */
> + unsigned int val = 0xff; /* If regmap throws an error return 0xff */
> + int rc = regmap_read(mbox->regmap, mbox->base + reg, &am

Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver

2017-09-04 Thread Cyril Bur
On Mon, 2017-09-04 at 20:13 +0530, Jassi Brar wrote:
> On Mon, Sep 4, 2017 at 12:47 PM, Cyril Bur  wrote:
> > Hi,
> > 
> > I haven't heard anything about this driver. I'm trying to interpret if
> > the silence is because there is something fundamentally wrong with the
> > driver or is it because it doesn't use any of the mailbox
> > infrastructure it is being ignored.
> > 
> 
> Its the latter.

Great! Thanks for your response, I'll resend it under char/misc.

Kind regards,

Cyril


Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver

2017-09-05 Thread Cyril Bur
On Tue, 2017-09-05 at 08:25 +0200, Greg KH wrote:
> On Tue, Sep 05, 2017 at 09:37:19AM +1000, Cyril Bur wrote:
> > On Mon, 2017-09-04 at 20:13 +0530, Jassi Brar wrote:
> > > On Mon, Sep 4, 2017 at 12:47 PM, Cyril Bur  wrote:
> > > > Hi,
> > > > 
> > > > I haven't heard anything about this driver. I'm trying to interpret if
> > > > the silence is because there is something fundamentally wrong with the
> > > > driver or is it because it doesn't use any of the mailbox
> > > > infrastructure it is being ignored.
> > > > 
> > > 
> > > Its the latter.
> > 
> > Great! Thanks for your response, I'll resend it under char/misc.
> 
> Wait, no, please use the mailbox infrastructure for this, making a
> one-off char/misc driver isn't ok when there is a framework for you to
> use.
> 

Thanks very much for your response.

Yes you're absolutely correct, I'll send it to drivers/mailbox. I look
forward to working more closely with Jassi. I'll be sure to fully and
completely exploit all the features of this mailbox framework.

Looking forward to continued development,

Thanks very much to all involved,

Cyril

> thanks,
> 
> greg k-h


[PATCH] checkpatch: Silence perl 5.26.0 unescaped left brace warnings

2017-06-06 Thread Cyril Bur
As of perl 5, version 26, subversion 0 (v5.26.0) some new warnings have
occurred when running checkpatch.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){
<-- HERE \s*/ at scripts/checkpatch.pl line 3544.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){
<-- HERE \s*/ at scripts/checkpatch.pl line 3885.

Unescaped left brace in regex is deprecated here (and will be fatal in
Perl 5.30), passed through in regex; marked by <-- HERE in
m/^(\+.*(?:do|\))){ <-- HERE / at scripts/checkpatch.pl line 4374.

It seems perfectly reasonable to do as the warning suggests and simply
escape the left brace in these three locations.

Signed-off-by: Cyril Bur 
---
 scripts/checkpatch.pl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4b9569fa931b..33e50614a5d9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3541,7 +3541,7 @@ sub process {
$fixedline =~ s/\s*=\s*$/ = {/;
fix_insert_line($fixlinenr, $fixedline);
$fixedline = $line;
-   $fixedline =~ s/^(.\s*){\s*/$1/;
+   $fixedline =~ s/^(.\s*)\{\s*/$1/;
fix_insert_line($fixlinenr, $fixedline);
}
}
@@ -3882,7 +3882,7 @@ sub process {
my $fixedline = rtrim($prevrawline) . " {";
fix_insert_line($fixlinenr, $fixedline);
$fixedline = $rawline;
-   $fixedline =~ s/^(.\s*){\s*/$1\t/;
+   $fixedline =~ s/^(.\s*)\{\s*/$1\t/;
if ($fixedline !~ /^\+\s*$/) {
fix_insert_line($fixlinenr, $fixedline);
}
@@ -4371,7 +4371,7 @@ sub process {
if (ERROR("SPACING",
  "space required before the open brace '{'\n" 
. $herecurr) &&
$fix) {
-   $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
+   $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 
{/;
}
}
 
-- 
2.13.0



Re: [PATCH V2 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-13 Thread Cyril Bur
On Tue, 2017-06-13 at 23:26 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 

Hi Shilpasri,

I have another question about printing of errors, see below. Otherwise
looks better.

When you're dropping the locks I wonder why you couldn't just use
atomic_set().

Finally, as you say below, if a user does a read() without having done
a write(), there isn't really any guarantee as to what they get. They
will get the previous response if there had been a write() call, what
about if there had never been a write() call? 
Could this be considered a security problem? Imagine that one program
is using the file descriptor and a malicious program keeps trying to
get the file descriptor and then the program which has the file
descriptor crashes? What information might the malicious program get?

More inline,

Cyril

> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from V2:
> - Remove spinlock and use atomic_t for setting and clearing flags
> - Fix endian swapping
> - Use pa() and va() before and after opal call for accessing buffer
>   data
> - Replace (u8 *) with __be64 for buffer pointers
> - User reads the previous OCC response if the user does a read()
>   before a write(). Is this wrong?
> - Add WARN_ON check for nr_occs > 254
> 
>  arch/powerpc/include/asm/opal-api.h|  41 +++-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 314 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   8 +
>  6 files changed, 367 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_OCC_INVALID_STATE   -33
> +#define OPAL_OCC_BUSY-34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH-36
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +194,8 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST149
>  
>  /* Device tree flags */
>  
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>  
>  struct opal_prd_msg;
>  
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_SELECT_SENSOR_GROUPS,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + __be64 cdata;
> + __be64 rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + __be16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + __be16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
>  #define OCC_RESET   0
>  #define OCC_LOAD1
>  #define OCC_THROTTLE2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> +  bool retry);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  o

Re: [RFC 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-07 Thread Cyril Bur
On Mon, 2017-06-05 at 11:43 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 
> Signed-off-by: Shilpasri G Bhat 

Hi Shilpasri,

Is there somewhere interface (for userspace) is defined? My main
concern is: who is doing the endian swapping - I assume once the data
leaves Linux and goes into skiboot everything will be in big endian.
I've looked through your userspace tool in your skiboot patches and I
don't see anywhere where it deals with endian. Linux does some dealing
here but since userspace passes an arbitrary buffer, Linux can't
convert that.

Also I notice that you'll block and wait for the response from skiboot
in the write() call from userspace. Just as a fairly easy optimisation
I would have though it would make more sense to send the async command
to skiboot in the write() but actually block on
opal_async_wait_response() when userspace does the read() to get the
response data. This provides the advantage that skiboot and the occ
might be able to process the request while userspace gets around the
calling read() as opposed to forcing userspace to wait in the write()
call. Having said that, I don't think this path is particularly
performance critical, so this doesn't seem to be a big problem.

A few more questions below.

Cyril

> ---
>  arch/powerpc/include/asm/opal-api.h|  40 ++-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 349 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   8 +
>  6 files changed, 401 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..d75fbb9 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_OCC_INVALID_STATE   -33
> +#define OPAL_OCC_BUSY-34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH-36
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +194,8 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST149
>  
>  /* Device tree flags */
>  
> @@ -829,6 +834,39 @@ struct opal_prd_msg_header {
>  
>  struct opal_prd_msg;
>  
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + u8 *cdata;
> + u8 *rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + u16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + u16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
>  #define OCC_RESET   0
>  #define OCC_LOAD1
>  #define OCC_THROTTLE2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> +  bool retry);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= o

Re: [PATCH V9 1/3] powernv: powercap: Add support for powercap framework

2017-07-30 Thread Cyril Bur
On Mon, 2017-07-31 at 07:54 +0530, Shilpasri G Bhat wrote:
> Adds a generic powercap framework to change the system powercap
> inband through OPAL-OCC command/response interface.
> 
> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from V8:
> - Use __pa() while passing pointer in opal call
> - Use mutex_lock_interruptible()
> - Fix error codes returned to user
> - Allocate and add sysfs attributes in a single loop
> 
>  arch/powerpc/include/asm/opal-api.h|   5 +-
>  arch/powerpc/include/asm/opal.h|   4 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-powercap.c | 243 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
>  arch/powerpc/platforms/powernv/opal.c  |   4 +
>  6 files changed, 258 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 3130a73..c3e0c4a 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,7 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_TIMEOUT -33
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +191,9 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_GET_POWERCAP152
> +#define OPAL_SET_POWERCAP153
> +#define OPAL_LAST153
>  
>  /* Device tree flags */
>  
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 588fb1c..ec2087c 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -267,6 +267,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp,
>  int64_t opal_xive_free_irq(uint32_t girq);
>  int64_t opal_xive_sync(uint32_t type, uint32_t id);
>  int64_t opal_xive_dump(uint32_t type, uint32_t id);
> +int opal_get_powercap(u32 handle, int token, u32 *pcap);
> +int opal_set_powercap(u32 handle, int token, u32 pcap);
>  
>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> @@ -345,6 +347,8 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +void opal_powercap_init(void);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..e79f806 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.o
> +obj-y+= opal-kmsg.o opal-powercap.o
>  
>  obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c 
> b/arch/powerpc/platforms/powernv/opal-powercap.c
> new file mode 100644
> index 000..9be5093
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-powercap.c
> @@ -0,0 +1,243 @@
> +/*
> + * PowerNV OPAL Powercap interface
> + *
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#define pr_fmt(fmt) "opal-powercap: " fmt
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +DEFINE_MUTEX(powercap_mutex);
> +
> +static struct kobject *powercap_kobj;
> +
> +struct powercap_attr {
> + u32 handle;
> + struct kobj_attribute attr;
> +};
> +
> +static struct pcap {
> + struct attribute_group pg;
> + struct powercap_attr *pattrs;
> +} *pcaps;
> +
> +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute 
> *attr,
> +  char *buf)
> +{
> + struct powercap_attr *pcap_attr = container_of(attr,
> + struct powercap_attr, attr);
> + struct opal_msg msg;
> + u32 pcap;
> + int ret, token;
> +
> + token = opal_async_get_token_interruptible();
> + if (token < 0) {
> + pr_devel("Failed to get

Re: [PATCH V3 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface

2017-06-18 Thread Cyril Bur
On Mon, 2017-06-19 at 11:44 +0530, Shilpasri G Bhat wrote:
> In P9, OCC (On-Chip-Controller) supports shared memory based
> commad-response interface. Within the shared memory there is an OPAL
> command buffer and OCC response buffer that can be used to send
> inband commands to OCC. This patch adds a platform driver to support
> the command/response interface between OCC and the host.
> 

Hi Shilpasri,

Looks really nice now! I do have a few questions on the new stuff
around locking for read()ing the response. I've put one inline in the
read() code, feel free to disagree :)

My other question is now sort of the opposite problem, what if you have
two threads, one does a write() so now your read()ing mutex allows
reads and then thread A does write() at the 'same' time as thread B
does read(). Is there a risk of corruption? Would it be good to also
hold cmd_in_progress in read().

I'm also assuming that two writes is fine and that loosing the previous
response is by design - users might not care about the response, or
they might want ignore it or (as often happens in userspace) they don't
feel the need to check return codes.

> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from V2:
> - Use atomic_set while dropping locks
> - Add rsp_consumed flag to protect the read()
> - Change the logging level of prints
> - Add MAX_POSSIBLE_CHIPS
> - Move the endian conversion of data to opal_occ_cmd_prepare()
> 
>  arch/powerpc/include/asm/opal-api.h|  41 +++-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 307 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   8 +
>  6 files changed, 360 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index cb3e624..011d86c 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,10 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_OCC_INVALID_STATE   -33
> +#define OPAL_OCC_BUSY-34
> +#define OPAL_OCC_CMD_TIMEOUT -35
> +#define OPAL_OCC_RSP_MISMATCH-36
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +194,8 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_OCC_COMMAND 149
> +#define OPAL_LAST149
>  
>  /* Device tree flags */
>  
> @@ -829,6 +834,40 @@ struct opal_prd_msg_header {
>  
>  struct opal_prd_msg;
>  
> +enum occ_cmd {
> + OCC_CMD_AMESTER_PASS_THRU = 0,
> + OCC_CMD_CLEAR_SENSOR_DATA,
> + OCC_CMD_SET_POWER_CAP,
> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
> + OCC_CMD_SELECT_SENSOR_GROUPS,
> + OCC_CMD_LAST
> +};
> +
> +struct opal_occ_cmd_rsp_msg {
> + __be64 cdata;
> + __be64 rdata;
> + __be16 cdata_size;
> + __be16 rdata_size;
> + u8 cmd;
> + u8 request_id;
> + u8 status;
> +};
> +
> +struct opal_occ_cmd_data {
> + __be16 size;
> + u8 cmd;
> + u8 data[];
> +};
> +
> +struct opal_occ_rsp_data {
> + __be16 size;
> + u8 status;
> + u8 data[];
> +};
> +
> +#define MAX_OPAL_CMD_DATA_LENGTH4090
> +#define MAX_OCC_RSP_DATA_LENGTH 8698
> +
>  #define OCC_RESET   0
>  #define OCC_LOAD1
>  #define OCC_THROTTLE2
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 03ed493..e55ed79 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
> +  bool retry);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..f5f0902 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.o

Re: [PATCH V8 1/3] powernv: powercap: Add support for powercap framework

2017-07-27 Thread Cyril Bur
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote:
> Adds a generic powercap framework to change the system powercap
> inband through OPAL-OCC command/response interface.
> 
> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from V7:
> - Replaced sscanf with kstrtoint
> 
>  arch/powerpc/include/asm/opal-api.h|   5 +-
>  arch/powerpc/include/asm/opal.h|   4 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-powercap.c | 237 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
>  arch/powerpc/platforms/powernv/opal.c  |   4 +
>  6 files changed, 252 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 3130a73..c3e0c4a 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -42,6 +42,7 @@
>  #define OPAL_I2C_STOP_ERR-24
>  #define OPAL_XIVE_PROVISIONING   -31
>  #define OPAL_XIVE_FREE_ACTIVE-32
> +#define OPAL_TIMEOUT -33
>  
>  /* API Tokens (in r0) */
>  #define OPAL_INVALID_CALL   -1
> @@ -190,7 +191,9 @@
>  #define OPAL_NPU_INIT_CONTEXT146
>  #define OPAL_NPU_DESTROY_CONTEXT 147
>  #define OPAL_NPU_MAP_LPAR148
> -#define OPAL_LAST148
> +#define OPAL_GET_POWERCAP152
> +#define OPAL_SET_POWERCAP153
> +#define OPAL_LAST153
>  
>  /* Device tree flags */
>  
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 588fb1c..ec2087c 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -267,6 +267,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp,
>  int64_t opal_xive_free_irq(uint32_t girq);
>  int64_t opal_xive_sync(uint32_t type, uint32_t id);
>  int64_t opal_xive_dump(uint32_t type, uint32_t id);
> +int opal_get_powercap(u32 handle, int token, u32 *pcap);
> +int opal_set_powercap(u32 handle, int token, u32 pcap);
>  
>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> @@ -345,6 +347,8 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_wake_poller(void);
>  
> +void opal_powercap_init(void);
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif /* _ASM_POWERPC_OPAL_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index b5d98cb..e79f806 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.o
> +obj-y+= opal-kmsg.o opal-powercap.o
>  
>  obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c 
> b/arch/powerpc/platforms/powernv/opal-powercap.c
> new file mode 100644
> index 000..7c57f4b
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-powercap.c
> @@ -0,0 +1,237 @@
> +/*
> + * PowerNV OPAL Powercap interface
> + *
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#define pr_fmt(fmt) "opal-powercap: " fmt
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +DEFINE_MUTEX(powercap_mutex);
> +
> +static struct kobject *powercap_kobj;
> +
> +struct powercap_attr {
> + u32 handle;
> + struct kobj_attribute attr;
> +};
> +
> +static struct attribute_group *pattr_groups;
> +static struct powercap_attr *pcap_attrs;
> +
> +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute 
> *attr,
> +  char *buf)
> +{
> + struct powercap_attr *pcap_attr = container_of(attr,
> + struct powercap_attr, attr);
> + struct opal_msg msg;
> + u32 pcap;
> + int ret, token;
> +
> + token = opal_async_get_token_interruptible();
> + if (token < 0) {
> + pr_devel("Failed to get token\n");
> + return token;
> + }
> +
> + mutex_lock(&powercap_mutex);

If this is purely a userspace interface, shouldn't this be
mutex_l

Re: [PATCH V8 2/3] powernv: Add support to set power-shifting-ratio

2017-07-27 Thread Cyril Bur
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote:
> This patch adds support to set power-shifting-ratio for CPU-GPU which
> is used by OCC power capping algorithm.
> 
> Signed-off-by: Shilpasri G Bhat 


Hi Shilpasri,

I started looking though this - a lot the comments to patch 1/3 apply
here so I'll stop repeating myself :).


Thanks,

Cyril
> ---
> Changes from V7:
> - Replaced sscanf with kstrtoint
> 
>  arch/powerpc/include/asm/opal-api.h|   4 +-
>  arch/powerpc/include/asm/opal.h|   3 +
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-psr.c  | 169 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
>  arch/powerpc/platforms/powernv/opal.c  |   3 +
>  6 files changed, 181 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-psr.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index c3e0c4a..0d37315 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -193,7 +193,9 @@
>  #define OPAL_NPU_MAP_LPAR148
>  #define OPAL_GET_POWERCAP152
>  #define OPAL_SET_POWERCAP153
> -#define OPAL_LAST153
> +#define OPAL_GET_PSR 154
> +#define OPAL_SET_PSR 155
> +#define OPAL_LAST155
>  
>  /* Device tree flags */
>  
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index ec2087c..58b30a4 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -269,6 +269,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp,
>  int64_t opal_xive_dump(uint32_t type, uint32_t id);
>  int opal_get_powercap(u32 handle, int token, u32 *pcap);
>  int opal_set_powercap(u32 handle, int token, u32 pcap);
> +int opal_get_power_shifting_ratio(u32 handle, int token, u32 *psr);
> +int opal_set_power_shifting_ratio(u32 handle, int token, u32 psr);
>  
>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> @@ -348,6 +350,7 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  void opal_wake_poller(void);
>  
>  void opal_powercap_init(void);
> +void opal_psr_init(void);
>  
>  #endif /* __ASSEMBLY__ */
>  
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index e79f806..9ed7d33 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.o opal-powercap.o
> +obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o
>  
>  obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-psr.c 
> b/arch/powerpc/platforms/powernv/opal-psr.c
> new file mode 100644
> index 000..07e3f78
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-psr.c
> @@ -0,0 +1,169 @@
> +/*
> + * PowerNV OPAL Power-Shifting-Ratio interface
> + *
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#define pr_fmt(fmt) "opal-psr: " fmt
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +DEFINE_MUTEX(psr_mutex);
> +
> +static struct kobject *psr_kobj;
> +
> +struct psr_attr {
> + u32 handle;
> + struct kobj_attribute attr;
> +};
> +
> +static struct psr_attr *psr_attrs;
> +static struct kobject *psr_kobj;
> +
> +static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
> + char *buf)
> +{
> + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
> + struct opal_msg msg;
> + int psr, ret, token;
> +
> + token = opal_async_get_token_interruptible();
> + if (token < 0) {
> + pr_devel("Failed to get token\n");
> + return token;
> + }
> +
> + mutex_lock(&psr_mutex);
> + ret = opal_get_power_shifting_ratio(psr_attr->handle, token, &psr);

__pa()

> + switch (ret) {
> + case OPAL_ASYNC_COMPLETION:
> + ret = opal_async_wait_response(token, &msg);
> + if (ret) {
> + pr_devel("Failed to wait

Re: [PATCH V8 3/3] powernv: Add support to clear sensor groups data

2017-07-27 Thread Cyril Bur
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote:
> Adds support for clearing different sensor groups. OCC inband sensor
> groups like CSM, Profiler, Job Scheduler can be cleared using this
> driver. The min/max of all sensors belonging to these sensor groups
> will be cleared.
> 

Hi Shilpasri,

I think also some comments from v1 also apply here.

Other comments inline

Thanks,

Cyril

> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from V7:
> - s/send_occ_command/opal_sensor_groups_clear_history
> 
>  arch/powerpc/include/asm/opal-api.h|   3 +-
>  arch/powerpc/include/asm/opal.h|   2 +
>  arch/powerpc/include/uapi/asm/opal-occ.h   |  23 ++
>  arch/powerpc/platforms/powernv/Makefile|   2 +-
>  arch/powerpc/platforms/powernv/opal-occ.c  | 109 
> +
>  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
>  arch/powerpc/platforms/powernv/opal.c  |   3 +
>  7 files changed, 141 insertions(+), 2 deletions(-)
>  create mode 100644 arch/powerpc/include/uapi/asm/opal-occ.h
>  create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 0d37315..342738a 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -195,7 +195,8 @@
>  #define OPAL_SET_POWERCAP153
>  #define OPAL_GET_PSR 154
>  #define OPAL_SET_PSR 155
> -#define OPAL_LAST155
> +#define OPAL_SENSOR_GROUPS_CLEAR 156
> +#define OPAL_LAST156
>  
>  /* Device tree flags */
>  
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 58b30a4..92db6af 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -271,6 +271,7 @@ int64_t opal_xive_set_vp_info(uint64_t vp,
>  int opal_set_powercap(u32 handle, int token, u32 pcap);
>  int opal_get_power_shifting_ratio(u32 handle, int token, u32 *psr);
>  int opal_set_power_shifting_ratio(u32 handle, int token, u32 psr);
> +int opal_sensor_groups_clear(u32 group_hndl, int token);
>  
>  /* Internal functions */
>  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
> @@ -351,6 +352,7 @@ static inline int opal_get_async_rc(struct opal_msg msg)
>  
>  void opal_powercap_init(void);
>  void opal_psr_init(void);
> +int opal_sensor_groups_clear_history(u32 handle);
>  
>  #endif /* __ASSEMBLY__ */
>  
> diff --git a/arch/powerpc/include/uapi/asm/opal-occ.h 
> b/arch/powerpc/include/uapi/asm/opal-occ.h
> new file mode 100644
> index 000..97c45e2
> --- /dev/null
> +++ b/arch/powerpc/include/uapi/asm/opal-occ.h
> @@ -0,0 +1,23 @@
> +/*
> + * OPAL OCC command interface
> + * Supported on POWERNV platform
> + *
> + * (C) Copyright IBM 2017
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2, or (at your option)
> + * any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _UAPI_ASM_POWERPC_OPAL_OCC_H_
> +#define _UAPI_ASM_POWERPC_OPAL_OCC_H_
> +
> +#define OPAL_OCC_IOCTL_CLEAR_SENSOR_GROUPS   _IOR('o', 1, u32)
> +
> +#endif /* _UAPI_ASM_POWERPC_OPAL_OCC_H */
> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index 9ed7d33..f193b33 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o 
> opal-async.o idle.o
>  obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o 
> opal-flash.o
>  obj-y+= rng.o opal-elog.o opal-dump.o 
> opal-sysparam.o opal-sensor.o
>  obj-y+= opal-msglog.o opal-hmi.o opal-power.o 
> opal-irqchip.o
> -obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o
> +obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o 
> opal-occ.o
>  
>  obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o
>  obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o
> diff --git a/arch/powerpc/platforms/powernv/opal-occ.c 
> b/arch/powerpc/platforms/powernv/opal-occ.c
> new file mode 100644
> index 000..d1d4b28
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-occ.c
> @@ -0,0 +1,109 @@
> +/*
> + * Copyright IBM Corporation 2017
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free