Re: [PATCH v14 01/15] selftests/powerpc: Add more SPR numbers, TM & VMX instructions to 'reg.h'
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > This patch adds SPR number for TAR, PPR, DSCR special > purpose registers. It also adds TM, VSX, VMX related > instructions which will then be used by patches later > in the series. > > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo > --- > tools/testing/selftests/powerpc/reg.h | 42 > --- > 1 file changed, 39 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/powerpc/reg.h > b/tools/testing/selftests/powerpc/reg.h > index fddf368..fee7be9 100644 > --- a/tools/testing/selftests/powerpc/reg.h > +++ b/tools/testing/selftests/powerpc/reg.h > @@ -18,6 +18,19 @@ > > #define mb() asm volatile("sync" : : : "memory"); > > +/* Vector Instructions */ > +#define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << > 16) | \ > + ((rb) << 11) | (((xs) >> 5))) > +#define STXVD2X(xs, ra, rb) .long (0x7c000798 | VSX_XX1((xs), > (ra), (rb))) > +#define LXVD2X(xs, ra, rb) .long (0x7c000698 | VSX_XX1((xs), > (ra), (rb))) Theres an instructions.h file in tools/testing/selftests/powerpc/ would these be better suited there? > + > +/* TM instructions */ > +#define TBEGIN ".long 0x7C00051D;" > +#define TABORT ".long 0x7C00071D;" > +#define TEND ".long 0x7C00055D;" > +#define TSUSPEND ".long 0x7C0005DD;" > +#define TRESUME ".long 0x7C2005DD;" > + These are only useful on old compilers that don't know about TM. For selftests I would discourage creating these and using the actual instructions, they are fairly well known today by most compilers. > #define SPRN_MMCR2 769 > #define SPRN_MMCRA 770 > #define SPRN_MMCR0 779 > @@ -51,10 +64,33 @@ > #define SPRN_SDAR 781 > #define SPRN_SIER 768 > > -#define SPRN_TEXASR 0x82 > +#define SPRN_TEXASR 0x82/* Transaction Exception and Status > Register */ > #define SPRN_TFIAR 0x81/* Transaction Failure Inst > Addr*/ > #define SPRN_TFHAR 0x80/* Transaction Failure Handler Addr > */ > -#define TEXASR_FS 0x0800 > -#define SPRN_TAR0x32f > +#define SPRN_TAR0x32f/* Target Address Register */ > + > +#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ > +#define SPRN_DSCR 0x03 /* Data Stream Control Register > */ > +#define SPRN_PPR 896 /* Program Priority Register */ > + > +/* TEXASR register bits */ > +#define TEXASR_FC0xFE00 > +#define TEXASR_FP0x0100 > +#define TEXASR_DA0x0080 > +#define TEXASR_NO0x0040 > +#define TEXASR_FO0x0020 > +#define TEXASR_SIC 0x0010 > +#define TEXASR_NTC 0x0008 > +#define TEXASR_TC0x0004 > +#define TEXASR_TIC 0x0002 > +#define TEXASR_IC0x0001 > +#define TEXASR_IFC 0x8000 > +#define TEXASR_ABT 0x0001 > +#define TEXASR_SPD 0x8000 > +#define TEXASR_HV0x2000 > +#define TEXASR_PR0x1000 > +#define TEXASR_FS0x0800 > +#define TEXASR_TE0x0400 > +#define TEXASR_ROT 0x0200 > > #endif /* _SELFTESTS_POWERPC_REG_H */
Re: [PATCH v14 05/15] selftests/powerpc: Add ptrace tests for GPR/FPR registers in TM
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > This patch adds ptrace interface test for GPR/FPR registers > inside TM context. This adds ptrace interface based helper > functions related to checkpointed GPR/FPR access. > > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo > --- > tools/testing/selftests/powerpc/ptrace/Makefile| 3 +- > .../selftests/powerpc/ptrace/ptrace-tm-gpr.c | 296 > + > 2 files changed, 298 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-tm- > gpr.c > > diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile > b/tools/testing/selftests/powerpc/ptrace/Makefile > index 31e8e33..170683a 100644 > --- a/tools/testing/selftests/powerpc/ptrace/Makefile > +++ b/tools/testing/selftests/powerpc/ptrace/Makefile > @@ -1,8 +1,9 @@ > -TEST_PROGS := ptrace-ebb ptrace-gpr > +TEST_PROGS := ptrace-ebb ptrace-gpr ptrace-tm-gpr > > all: $(TEST_PROGS) > CFLAGS += -m64 > $(TEST_PROGS): ../harness.c ptrace.S ../utils.c ptrace.h > ptrace-ebb: ../pmu/event.c ../pmu/lib.c ../pmu/ebb/ebb_handler.S > ../pmu/ebb/busy_loop.S > + > clean: > rm -f $(TEST_PROGS) *.o > diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c > b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c > new file mode 100644 > index 000..8417d04 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c > @@ -0,0 +1,296 @@ > +/* > + * Ptrace test for GPR/FPR registers in TM context > + * > + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#include "ptrace.h" > +#include "ptrace-gpr.h" > + > +/* Tracer and Tracee Shared Data */ > +int shm_id; > +volatile unsigned long *cptr, *pptr; > + > +float a = FPR_1; > +float b = FPR_2; > +float c = FPR_3; > + > +void tm_gpr(void) > +{ > + unsigned long gpr_buf[18]; > + unsigned long result, texasr; > + float fpr_buf[32]; > + > + printf("Starting the child\n"); > + cptr = (unsigned long *)shmat(shm_id, NULL, 0); > + > +trans: > + cptr[1] = 0; > + asm __volatile__( > + > + "li 14, %[gpr_1];" > + "li 15, %[gpr_1];" > + "li 16, %[gpr_1];" > + "li 17, %[gpr_1];" > + "li 18, %[gpr_1];" > + "li 19, %[gpr_1];" > + "li 20, %[gpr_1];" > + "li 21, %[gpr_1];" > + "li 22, %[gpr_1];" > + "li 23, %[gpr_1];" > + "li 24, %[gpr_1];" > + "li 25, %[gpr_1];" > + "li 26, %[gpr_1];" > + "li 27, %[gpr_1];" > + "li 28, %[gpr_1];" > + "li 29, %[gpr_1];" > + "li 30, %[gpr_1];" > + "li 31, %[gpr_1];" > + > + "lfs 0, 0(%[flt_1]);" > + "lfs 1, 0(%[flt_1]);" > + "lfs 2, 0(%[flt_1]);" > + "lfs 3, 0(%[flt_1]);" > + "lfs 4, 0(%[flt_1]);" > + "lfs 5, 0(%[flt_1]);" > + "lfs 6, 0(%[flt_1]);" > + "lfs 7, 0(%[flt_1]);" > + "lfs 8, 0(%[flt_1]);" > + "lfs 9, 0(%[flt_1]);" > + "lfs 10, 0(%[flt_1]);" > + "lfs 11, 0(%[flt_1]);" > + "lfs 12, 0(%[flt_1]);" > + "lfs 13, 0(%[flt_1]);" > + "lfs 14, 0(%[flt_1]);" > + "lfs 15, 0(%[flt_1]);" > + "lfs 16, 0(%[flt_1]);" > + "lfs 17, 0(%[flt_1]);" > + "lfs 18, 0(%[flt_1]);" > + "lfs 19, 0(%[flt_1]);" > + "lfs 20, 0(%[flt_1]);" > + "lfs 21, 0(%[flt_1]);" > + "lfs 22, 0(%[flt_1]);" > + "lfs 23, 0(%[flt_1]);" > + "lfs 24, 0(%[flt_1]);" > + "lfs 25, 0(%[flt_1]);" > + "lfs 26, 0(%[flt_1]);" > + "lfs 27, 0(%[flt_1]);" > + "lfs 28, 0(%[flt_1]);" > + "lfs 29, 0(%[flt_1]);" > + "lfs 30, 0(%[flt_1]);" > + "lfs 31, 0(%[flt_1]);" > + There was this in the previous patch? Can we consolidate? > + "1: ;" > + TBEGIN tbegin. is probably fine > + "beq 2f;" > + > + "li 14, %[gpr_2];" > + "li 15, %[gpr_2];" > + "li 16, %[gpr_2];" > + "li 17, %[gpr_2];" > + "li 18, %[gpr_2];" > + "li 19, %[gpr_2];" > + "li 20, %[gpr_2];" > + "li 21, %[gpr_2];" > + "li 22, %[gpr_2];" > + "li 23, %[gpr_2];" > + "li 24, %[gpr_2];" > + "li 25, %[gpr_2];" > + "li 26, %[gpr_2];" > + "li 27, %[gpr_2];" > + "li 28, %[gpr_2];" > +
Re: [PATCH v14 04/15] selftests/powerpc: Add ptrace tests for GPR/FPR registers
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > This patch adds ptrace interface test for GPR/FPR registers. > This adds ptrace interface based helper functions related to > GPR/FPR access and some assembly helper functions related to > GPR/FPR registers. > > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo > --- > tools/testing/selftests/powerpc/ptrace/Makefile| 3 +- > .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 196 > +++ > .../testing/selftests/powerpc/ptrace/ptrace-gpr.h | 74 > tools/testing/selftests/powerpc/ptrace/ptrace.S| 131 > + > tools/testing/selftests/powerpc/ptrace/ptrace.h| 211 > + > 5 files changed, 614 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace- > gpr.c > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace- > gpr.h > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.S > [snip] > new file mode 100644 > index 000..193beea > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.S > @@ -0,0 +1,131 @@ > +/* > + * Ptrace interface test helper assembly functions > + * > + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#include > +#include "../reg.h" > + > + > +/* Non volatile GPR - unsigned long buf[18] */ > +FUNC_START(load_gpr) > + ld 14, 0*8(3) > + ld 15, 1*8(3) > + ld 16, 2*8(3) > + ld 17, 3*8(3) > + ld 18, 4*8(3) > + ld 19, 5*8(3) > + ld 20, 6*8(3) > + ld 21, 7*8(3) > + ld 22, 8*8(3) > + ld 23, 9*8(3) > + ld 24, 10*8(3) > + ld 25, 11*8(3) > + ld 26, 12*8(3) > + ld 27, 13*8(3) > + ld 28, 14*8(3) > + ld 29, 15*8(3) > + ld 30, 16*8(3) > + ld 31, 17*8(3) > + blr > +FUNC_END(load_gpr) > + > +FUNC_START(store_gpr) > + std 14, 0*8(3) > + std 15, 1*8(3) > + std 16, 2*8(3) > + std 17, 3*8(3) > + std 18, 4*8(3) > + std 19, 5*8(3) > + std 20, 6*8(3) > + std 21, 7*8(3) > + std 22, 8*8(3) > + std 23, 9*8(3) > + std 24, 10*8(3) > + std 25, 11*8(3) > + std 26, 12*8(3) > + std 27, 13*8(3) > + std 28, 14*8(3) > + std 29, 15*8(3) > + std 30, 16*8(3) > + std 31, 17*8(3) > + blr > +FUNC_END(store_gpr) > + > +/* Single Precision Float - float buf[32] */ > +FUNC_START(load_fpr) > + lfs 0, 0*4(3) > + lfs 1, 1*4(3) > + lfs 2, 2*4(3) > + lfs 3, 3*4(3) > + lfs 4, 4*4(3) > + lfs 5, 5*4(3) > + lfs 6, 6*4(3) > + lfs 7, 7*4(3) > + lfs 8, 8*4(3) > + lfs 9, 9*4(3) > + lfs 10, 10*4(3) > + lfs 11, 11*4(3) > + lfs 12, 12*4(3) > + lfs 13, 13*4(3) > + lfs 14, 14*4(3) > + lfs 15, 15*4(3) > + lfs 16, 16*4(3) > + lfs 17, 17*4(3) > + lfs 18, 18*4(3) > + lfs 19, 19*4(3) > + lfs 20, 20*4(3) > + lfs 21, 21*4(3) > + lfs 22, 22*4(3) > + lfs 23, 23*4(3) > + lfs 24, 24*4(3) > + lfs 25, 25*4(3) > + lfs 26, 26*4(3) > + lfs 27, 27*4(3) > + lfs 28, 28*4(3) > + lfs 29, 29*4(3) > + lfs 30, 30*4(3) > + lfs 31, 31*4(3) > + blr > +FUNC_END(load_fpr) > + > +FUNC_START(store_fpr) > + stfs 0, 0*4(3) > + stfs 1, 1*4(3) > + stfs 2, 2*4(3) > + stfs 3, 3*4(3) > + stfs 4, 4*4(3) > + stfs 5, 5*4(3) > + stfs 6, 6*4(3) > + stfs 7, 7*4(3) > + stfs 8, 8*4(3) > + stfs 9, 9*4(3) > + stfs 10, 10*4(3) > + stfs 11, 11*4(3) > + stfs 12, 12*4(3) > + stfs 13, 13*4(3) > + stfs 14, 14*4(3) > + stfs 15, 15*4(3) > + stfs 16, 16*4(3) > + stfs 17, 17*4(3) > + stfs 18, 18*4(3) > + stfs 19, 19*4(3) > + stfs 20, 20*4(3) > + stfs 21, 21*4(3) > + stfs 22, 22*4(3) > + stfs 23, 23*4(3) > + stfs 24, 24*4(3) > + stfs 25, 25*4(3) > + stfs 26, 26*4(3) > + stfs 27, 27*4(3) > + stfs 28, 28*4(3) > + stfs 29, 29*4(3) > + stfs 30, 30*4(3) > + stfs 31, 31*4(3) > + blr > +FUNC_END(store_fpr) I wrote similar functions in math/fpu_asm.S perhaps it would be time consolidate those and these into an fpu_asm.S file at a higher level, TM related tests would benefit as well. > diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h > b/tools/testing/selftests/powerpc/ptrace/ptrace.h > index fbf73ca..1019004 100644 > --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h > @@ -157,6 +157,214 @@ fail: > return TEST_FAIL; > } > [s
Re: [PATCH v14 07/15] selftests/powerpc: Add ptrace tests for TAR, PPR, DSCR registers
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > This patch adds ptrace interface test for TAR, PPR, DSCR > registers. This also adds ptrace interface based helper > functions related to TAR, PPR, DSCR register access. > > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo > --- > tools/testing/selftests/powerpc/ptrace/Makefile| 3 +- > .../testing/selftests/powerpc/ptrace/ptrace-tar.c | 159 > ++ > .../testing/selftests/powerpc/ptrace/ptrace-tar.h | 50 ++ > tools/testing/selftests/powerpc/ptrace/ptrace.h| 181 > + > 4 files changed, 392 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace- > tar.c > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace- > tar.h > [snip] > + > +void tar(void) > +{ > + unsigned long reg[3]; > + int ret; > + > + cptr = (int *)shmat(shm_id, NULL, 0); > + printf("%-30s TAR: %u PPR: %lx DSCR: %u\n", > + user_write, TAR_1, PPR_1, DSCR_1); > + > + mtspr(SPRN_TAR, TAR_1); > + mtspr(SPRN_PPR, PPR_1); > + mtspr(SPRN_DSCR, DSCR_1); > + > + cptr[2] = 1; > + > + /* Wait on parent */ > + while (!cptr[0]); asm volatile("" ::: "memory"); > + > + reg[0] = mfspr(SPRN_TAR); > + reg[1] = mfspr(SPRN_PPR); > + reg[2] = mfspr(SPRN_DSCR); > + > + printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n", > + user_read, reg[0], reg[1], reg[2]); > + > + /* Unblock the parent now */ > + cptr[1] = 1; > + shmdt((int *)cptr); > + > + ret = validate_tar_registers(reg, TAR_2, PPR_2, DSCR_2); > + if (ret) > + exit(1); > + exit(0); > +} > + > +int trace_tar(pid_t child) > +{ > + unsigned long reg[3]; > + int ret; > + > + ret = start_trace(child); > + if (ret) > + return TEST_FAIL; > + > + ret = show_tar_registers(child, reg); > + if (ret) > + return TEST_FAIL; > + > + printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n", > + ptrace_read_running, reg[0], reg[1], > reg[2]); > + > + ret = validate_tar_registers(reg, TAR_1, PPR_1, DSCR_1); > + if (ret) > + return TEST_FAIL; > + > + ret = stop_trace(child); > + if (ret) > + return TEST_FAIL; > + > + return TEST_PASS; > +} > + > +int trace_tar_write(pid_t child) > +{ > + int ret; > + > + ret = start_trace(child); > + if (ret) > + return TEST_FAIL; > + > + ret = write_tar_registers(child, TAR_2, PPR_2, DSCR_2); > + if (ret) > + return TEST_FAIL; > + > + printf("%-30s TAR: %u PPR: %lx DSCR: %u\n", > + ptrace_write_running, TAR_2, PPR_2, DSCR_2); > + > + ret = stop_trace(child); > + if (ret) > + return TEST_FAIL; > + > + return TEST_PASS; > +} More comments about calling TEST_FAIL(x) > + > +int ptrace_tar(void) > +{ > + pid_t pid; > + int ret, status; > + > + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, > 0777|IPC_CREAT); > + pid = fork(); > + if (pid < 0) { > + perror("fork() failed"); > + return TEST_FAIL; > + } > + > + if (pid == 0) > + tar(); > + > + if (pid) { > [snip]
Re: [PATCH v14 15/15] selftests/powerpc: Fix a build issue
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > Fixes the following build failure - > > cp_abort.c:90:3: error: ‘for’ loop initial declarations are only > allowed in C99 or C11 mode > for (int i = 0; i < NUM_LOOPS; i++) { > ^ > cp_abort.c:90:3: note: use option -std=c99, -std=gnu99, -std=c11 or > -std=gnu11 to compile your code > cp_abort.c:97:3: error: ‘for’ loop initial declarations are only > allowed in C99 or C11 mode > for (int i = 0; i < NUM_LOOPS; i++) { > > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo Reviewed-by: Cyril Bur > --- > tools/testing/selftests/powerpc/context_switch/cp_abort.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git > a/tools/testing/selftests/powerpc/context_switch/cp_abort.c > b/tools/testing/selftests/powerpc/context_switch/cp_abort.c > index 5a5b55a..1ce7dce 100644 > --- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c > +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c > @@ -67,7 +67,7 @@ int test_cp_abort(void) > /* 128 bytes for a full cache line */ > char buf[128] __cacheline_aligned; > cpu_set_t cpuset; > - int fd1[2], fd2[2], pid; > + int fd1[2], fd2[2], pid, i; > char c; > > /* only run this test on a P9 or later */ > @@ -87,14 +87,14 @@ int test_cp_abort(void) > FAIL_IF(pid < 0); > > if (!pid) { > - for (int i = 0; i < NUM_LOOPS; i++) { > + for (i = 0; i < NUM_LOOPS; i++) { > FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1); > FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1); > /* A paste succeeds if CR0 EQ bit is set */ > FAIL_IF(paste(buf) & 0x2000); > } > } else { > - for (int i = 0; i < NUM_LOOPS; i++) { > + for (i = 0; i < NUM_LOOPS; i++) { > FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1); > copy(buf); > FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));
Re: [PATCH v14 13/15] selftests/powerpc: Add ptrace tests for TM SPR registers
On Mon, 2016-09-12 at 15:33 +0800, wei.guo.si...@gmail.com wrote: > From: Anshuman Khandual > > This patch adds ptrace interface test for TM SPR registers. This > also adds ptrace interface based helper functions related to TM > SPR registers access. > I'm seeing this one fail a lot, it does occasionally succeed but fails a lot on my test setup. I use qemu on a power8 for most of my testing: qemu-system-ppc64 --enable-kvm -machine pseries,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp 4,sockets=1,cores=2,threads=2 -nographic -vga none > Signed-off-by: Anshuman Khandual > Signed-off-by: Simon Guo > --- > tools/testing/selftests/powerpc/ptrace/Makefile| 3 +- > .../selftests/powerpc/ptrace/ptrace-tm-spr.c | 186 > + > tools/testing/selftests/powerpc/ptrace/ptrace.h| 35 > 3 files changed, 223 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-tm- > spr.c > > diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile > b/tools/testing/selftests/powerpc/ptrace/Makefile > index 797840a..f34670e 100644 > --- a/tools/testing/selftests/powerpc/ptrace/Makefile > +++ b/tools/testing/selftests/powerpc/ptrace/Makefile > @@ -1,7 +1,8 @@ > TEST_PROGS := ptrace-ebb ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr > \ > ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx > \ > -ptrace-tm-spd-vsx > +ptrace-tm-spd-vsx ptrace-tm-spr > > +include ../../lib.mk > > all: $(TEST_PROGS) > CFLAGS += -m64 > diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c > b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c > new file mode 100644 > index 000..2863070 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c > @@ -0,0 +1,186 @@ > +/* > + * Ptrace test TM SPR registers > + * > + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#include "ptrace.h" > + > +/* Tracee and tracer shared data */ > +struct shared { > + int flag; > + struct tm_spr_regs regs; > +}; > +unsigned long tfhar; > + > +int shm_id; > +volatile struct shared *cptr, *pptr; > + > +int shm_id1; > +volatile int *cptr1, *pptr1; > + > +#define TM_SCHED 0xde018c01 > +#define TM_KVM_SCHED 0xe001ac01 > + > +int validate_tm_spr(struct tm_spr_regs *regs) > +{ > + if (regs->tm_tfhar != tfhar) > + return TEST_FAIL; > + > + if ((regs->tm_texasr != TM_SCHED) && (regs->tm_texasr != > TM_KVM_SCHED)) > + return TEST_FAIL; The above condition fails, should this test try again if this condition is true, rather than fail? > + > + if ((regs->tm_texasr == TM_KVM_SCHED) && (regs->tm_tfiar != > 0)) > + return TEST_FAIL; > + > + return TEST_PASS; > +} > + [snip]
Re: [PATCH V4 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Wed, 2017-06-21 at 13:36 +0530, Shilpasri G Bhat wrote: > In P9, OCC (On-Chip-Controller) supports shared memory based > commad-response interface. Within the shared memory there is an OPAL > command buffer and OCC response buffer that can be used to send > inband commands to OCC. This patch adds a platform driver to support > the command/response interface between OCC and the host. > Sorry I probably should have pointed out earlier that I don't really understand the first patch or exactly what problem you're trying to solve. I've left it ignored, feel free to explain what the idea is there or hopefully someone who can see what you're trying to do can step in. As for this patch, just one thing. > Signed-off-by: Shilpasri G Bhat > --- > - Hold occ->cmd_in_progress in read() > - Reset occ->rsp_consumed if copy_to_user() fails > > arch/powerpc/include/asm/opal-api.h| 41 +++- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 313 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 8 + > 6 files changed, 366 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > [snip] > + > +static ssize_t opal_occ_read(struct file *file, char __user *buf, > + size_t count, loff_t *ppos) > +{ > + struct miscdevice *dev = file->private_data; > + struct occ *occ = container_of(dev, struct occ, dev); > + int rc; > + > + if (count < sizeof(*occ->rsp) + occ->rsp->size) > + return -EINVAL; > + > + if (!atomic_cmpxchg(&occ->rsp_consumed, 1, 0)) > + return -EBUSY; > + > + if (atomic_cmpxchg(&occ->cmd_in_progress, 0, 1)) > + return -EBUSY; > + Personally I would have done these two checks the other way around, it doesn't really matter which one you do first but what does matter is that you undo the change you did in the first cmpxchg if the second cmpxchg causes you do return. In this case if cmd_in_progress then you'll have marked the response as consumed... > + rc = copy_to_user((void __user *)buf, occ->rsp, > + sizeof(occ->rsp) + occ->rsp->size); > + if (rc) { > + atomic_set(&occ->rsp_consumed, 1); > + atomic_set(&occ->cmd_in_progress, 0); > + pr_err("Failed to copy OCC response data to user\n"); > + return rc; > + } > + > + atomic_set(&occ->cmd_in_progress, 0); > + return sizeof(*occ->rsp) + occ->rsp->size; > +} > + [snip]
Re: [PATCH V4 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Thu, 2017-06-22 at 09:57 +0530, Shilpasri G Bhat wrote: > Hi Cyril, > > On 06/22/2017 06:28 AM, Cyril Bur wrote: > > On Wed, 2017-06-21 at 13:36 +0530, Shilpasri G Bhat wrote: > > > In P9, OCC (On-Chip-Controller) supports shared memory based > > > commad-response interface. Within the shared memory there is an OPAL > > > command buffer and OCC response buffer that can be used to send > > > inband commands to OCC. This patch adds a platform driver to support > > > the command/response interface between OCC and the host. > > > > > > > Sorry I probably should have pointed out earlier that I don't really > > understand the first patch or exactly what problem you're trying to > > solve. I've left it ignored, feel free to explain what the idea is > > there or hopefully someone who can see what you're trying to do can > > step in. > > Thanks for reviewing this patch. > > For the first patch however, OCC expects a different request_id in the command > interface every time OPAL is requesting a new command . > 'opal_async_get_token_interruptible()' returns a free token from the > 'opal_async_complete_map' which does not work for the above OCC requirement as > we may end up getting the same token. Thus the first patch tries to get a new > token excluding a token that was used for the last command. > Oh yes I see because you're using the OPAL token to the opal call as the OCC msg request_id. Is there any requirement that these two numbers match? Is the 'different' a per OCC or a global 'all OCC' requirement? Couldn't you just have a counter in your occ struct in this patch that you increment on each call? I've looked through your skiboot patch and it looks like you use the msg->request_id so that you know who to respond to - wouldn't be easier to just pass the opal token through as an extra parameter to OPAL_OCC_COMMAND and store it alongside the msg structure? Apologies I shouldn't have overlooked this for so long. > > > > > As for this patch, just one thing. > > > > > > > Signed-off-by: Shilpasri G Bhat > > > --- > > > - Hold occ->cmd_in_progress in read() > > > - Reset occ->rsp_consumed if copy_to_user() fails > > > > > > arch/powerpc/include/asm/opal-api.h| 41 +++- > > > arch/powerpc/include/asm/opal.h| 3 + > > > arch/powerpc/platforms/powernv/Makefile| 2 +- > > > arch/powerpc/platforms/powernv/opal-occ.c | 313 > > > + > > > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > > > arch/powerpc/platforms/powernv/opal.c | 8 + > > > 6 files changed, 366 insertions(+), 2 deletions(-) > > > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > > > > > > [snip] > > > > > + > > > +static ssize_t opal_occ_read(struct file *file, char __user *buf, > > > + size_t count, loff_t *ppos) > > > +{ > > > + struct miscdevice *dev = file->private_data; > > > + struct occ *occ = container_of(dev, struct occ, dev); > > > + int rc; > > > + > > > + if (count < sizeof(*occ->rsp) + occ->rsp->size) > > > + return -EINVAL; > > > + > > > + if (!atomic_cmpxchg(&occ->rsp_consumed, 1, 0)) > > > + return -EBUSY; > > > + > > > + if (atomic_cmpxchg(&occ->cmd_in_progress, 0, 1)) > > > + return -EBUSY; > > > + > > > > Personally I would have done these two checks the other way around, it > > doesn't really matter which one you do first but what does matter is > > that you undo the change you did in the first cmpxchg if the second > > cmpxchg causes you do return. > > > > In this case if cmd_in_progress then you'll have marked the response as > > consumed... > > Here, if cmd_in_progress is set by some other thread doing a write() then it > will set the 'rsp_consumed' to valid on successful command completion. If > write() fails then we are doing a good thing here by not setting > 'rsp_consumed' > so the user will not be able to read previous command's response. > Yep, if you're happy I'm happy, I suppose there is a scenario where you might lose a response. read() and sees that there is a response, and marks it as consumed, meanwhile a write grabs cmd_in_progress which causes the read()er to return EBUSY but something fails before the write()er can actually send an occ command
Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels
On Mon, 2015-01-05 at 11:50 -0500, Don Zickus wrote: > cc'ing Marcelo > > On Mon, Dec 22, 2014 at 04:06:02PM +1100, Cyril Bur wrote: > > When the hypervisor pauses a virtualised kernel the kernel will observe a > > jump > > in timebase, this can cause spurious messages from the softlockup detector. > > > > Whilst these messages are harmless, they are accompanied with a stack trace > > which causes undue concern and more problematically the stack trace in the > > guest has nothing to do with the observed problem and can only be > > misleading. > > > > Futhermore, on POWER8 this is completely avoidable with the introduction of > > the Virtual Time Base (VTB) register. > > Hi Cyril, > > Your solution seems simple and doesn't disturb the softlockup code as much > as the x86 solution does. The only small issue I had was the use of > sched_clock instead of local_clock. I keep forgetting the difference > (unstable clock is the biggest reason I think). My apologies there it appears I stuffed up, local_clock was used initially in the softlockup code, I'll send a v2. > Other than that, I am not the biggest fan of putting multiple virtual > guest solutions for the same problem into the watchdog code. I would > prefer a common solution/framework to leverage. Agreed. > I have the x86 folks focusing on the steal_time stuff. It started with > KVM and I believe VMWare is working on utilizing it too (and maybe Xen). I'm not sure I've ever seen this, could you please point me towards something I can look at? > Not sure if that is useful or could be incoporated into the power8 code. > Though to be honest I am curious if the steal_time code could be ported to > your solution as it seems the watchdog code could remove all the > steal_time warts. Happy to help sus out the situation here, again, if you could pass on what the x86 guys are working on, thanks. Thanks, Cyril > I have cc'd Marcelo into this discussion as he was the last person I > remember talking with about this problem. > > Cheers, > Don -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
cc'd Martin Schwidefsky and Heiko Carstens. On Mon, 2015-01-05 at 14:10 -0800, Andrew Morton wrote: > On Mon, 22 Dec 2014 16:06:04 +1100 Cyril Bur wrote: > > > On POWER8 virtualised kernels the VTB register can be read to have a view of > > time that only increases while the guest is running. This will prevent > > guests > > from seeing time jump if a guest is paused for significant amounts of time. > > > > On POWER7 and below virtualised kernels stolen time is subtracted from > > sched_clock as a best effort approximation. This will not eliminate spurious > > warnings in the case of a suspended guest but may reduce the occurance in > > the > > case of softlockups due to host over commit. > > > > Bare metal kernels should avoid reading the VTB as KVM does not restore sane > > values when not executing. sched_clock is returned in this case. > > > > --- a/arch/powerpc/kernel/time.c > > +++ b/arch/powerpc/kernel/time.c > > @@ -621,6 +621,30 @@ unsigned long long sched_clock(void) > > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; > > } > > > > +unsigned long long running_clock(void) > > Non-kvm kernels don't need this code. Is there some appropriate > "#ifdef CONFIG_foo" we can wrap this in? CONFIG_PSERIES would work, having said that typical compilation for a powernv kernel almost always includes CONFIG_PSERIES (although it doesn't need to)... still, your point is valid, will add in v2. > > > > +{ > > + /* > > +* Don't read the VTB as a host since KVM does not switch in host > > timebase > > +* into the VTB when it takes a guest off the CPU, reading the VTB would > > +* result in reading 'last switched out' guest VTB. > > +*/ > > + > > + if (firmware_has_feature(FW_FEATURE_LPAR)) { > > + if (cpu_has_feature(CPU_FTR_ARCH_207S)) > > + return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << > > tb_to_ns_shift; > > + > > + /* This is a next best approximation without a VTB. */ > > + return sched_clock() - > > cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); > > Why is this result dependent on FW_FEATURE_LPAR? It's all generic code. Good point, the reason it ended up there is because I wanted to avoid behaviour changes. > > In fact the kernel/sched/clock.c default implementation of > running_clock() could use this expression. Would that be good or bad? :) For power I'm almost certain it would be fine, on platforms which don't do stolen time cpustat[CPUTIME_STEAL] should always be zero and if not then the value should always be sane (although as mentioned in the comment, not as accurate as using the VTB). Putting it in the default implementation could cause behavioural changes for x86 and s390, I would want their views on doing that. > > > + } > > + > > + /* > > +* On a host which doesn't do any virtualisation TB *should* equal VTB > > so > > +* it makes no difference anyway. > > +*/ > > + > > + return sched_clock(); > > +} > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels
On Mon, 2015-01-05 at 14:09 -0800, Andrew Morton wrote: > On Mon, 22 Dec 2014 16:06:02 +1100 Cyril Bur wrote: > > > When the hypervisor pauses a virtualised kernel the kernel will observe a > > jump > > in timebase, this can cause spurious messages from the softlockup detector. > > > > Whilst these messages are harmless, they are accompanied with a stack trace > > which causes undue concern and more problematically the stack trace in the > > guest has nothing to do with the observed problem and can only be > > misleading. > > > > Futhermore, on POWER8 this is completely avoidable with the introduction of > > the Virtual Time Base (VTB) register. > > Does this problem apply to other KVM implementations and to Xen? If > so, what would implementations of running_clock() for those look like? > If not, why not? Yes the problem should appear on other KVM implementations, not really sure about Xen but I don't see why the problem wouldn't crop up. x86 do have a method for dealing with it in the softlockup detector, they've added a check in the softlockup using a paravirtualised clock where the guest can discover if it had been paused, Xen could be using too. It doesn't appear s390 do anything. Thanks, Cyril > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/2] Quieten softlockup detector on virtualised kernels
On Tue, 2015-01-06 at 10:01 -0500, Don Zickus wrote: > On Tue, Jan 06, 2015 at 10:53:35AM +1100, Cyril Bur wrote: > > On Mon, 2015-01-05 at 11:50 -0500, Don Zickus wrote: > > > cc'ing Marcelo > > > > > > On Mon, Dec 22, 2014 at 04:06:02PM +1100, Cyril Bur wrote: > > > > When the hypervisor pauses a virtualised kernel the kernel will observe > > > > a jump > > > > in timebase, this can cause spurious messages from the softlockup > > > > detector. > > > > > > > > Whilst these messages are harmless, they are accompanied with a stack > > > > trace > > > > which causes undue concern and more problematically the stack trace in > > > > the > > > > guest has nothing to do with the observed problem and can only be > > > > misleading. > > > > > > > > Futhermore, on POWER8 this is completely avoidable with the > > > > introduction of > > > > the Virtual Time Base (VTB) register. > > > > > > Hi Cyril, > > > > > > Your solution seems simple and doesn't disturb the softlockup code as much > > > as the x86 solution does. The only small issue I had was the use of > > > sched_clock instead of local_clock. I keep forgetting the difference > > > (unstable clock is the biggest reason I think). > > My apologies there it appears I stuffed up, local_clock was used > > initially in the softlockup code, I'll send a v2. > > Thanks! > > > > > > Other than that, I am not the biggest fan of putting multiple virtual > > > guest solutions for the same problem into the watchdog code. I would > > > prefer a common solution/framework to leverage. > > Agreed. > > > > > I have the x86 folks focusing on the steal_time stuff. It started with > > > KVM and I believe VMWare is working on utilizing it too (and maybe Xen). > > I'm not sure I've ever seen this, could you please point me towards > > something I can look at? > > I am not too familar with it, but the kernel/watchdog.c code has calls to > kvm_check_and_clear_guest_paused(), which is probably a good place to > start. > Ah yes that, I did initially have a look at what it does when I undertook to solve the problem on power and I suppose the two solutions are similar in that they both just use a virtualised time source. The similarities stop there though, the paravirtualised clock that x86 uses provides (as the name of the function implies) a 'was paused' flag. Obviously the flag isn't something the vtb register on power8 can provide and since we have a vtb, its preferable to use that. Perhaps x86 can do something with running_clock? Regards, Cyril > Cheers, > Don > > > > > > Not sure if that is useful or could be incoporated into the power8 code. > > > Though to be honest I am curious if the steal_time code could be ported to > > > your solution as it seems the watchdog code could remove all the > > > steal_time warts. > > Happy to help sus out the situation here, again, if you could pass on > > what the x86 guys are working on, thanks. > > > > > > Thanks, > > > > Cyril > > > I have cc'd Marcelo into this discussion as he was the last person I > > > remember talking with about this problem. > > > > > > Cheers, > > > Don > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
On Wed, 2015-01-07 at 11:20 +0100, Martin Schwidefsky wrote: > On Tue, 06 Jan 2015 13:44:01 +1100 > Cyril Bur wrote: > > > On Mon, 2015-01-05 at 14:10 -0800, Andrew Morton wrote: > > > On Mon, 22 Dec 2014 16:06:04 +1100 Cyril Bur wrote: > > > > > > > On POWER8 virtualised kernels the VTB register can be read to have a > > > > view of > > > > time that only increases while the guest is running. This will prevent > > > > guests > > > > from seeing time jump if a guest is paused for significant amounts of > > > > time. > > > > > > > > On POWER7 and below virtualised kernels stolen time is subtracted from > > > > sched_clock as a best effort approximation. This will not eliminate > > > > spurious > > > > warnings in the case of a suspended guest but may reduce the occurance > > > > in the > > > > case of softlockups due to host over commit. > > > > > > > > Bare metal kernels should avoid reading the VTB as KVM does not restore > > > > sane > > > > values when not executing. sched_clock is returned in this case. > > > > > > > > --- a/arch/powerpc/kernel/time.c > > > > +++ b/arch/powerpc/kernel/time.c > > > > @@ -621,6 +621,30 @@ unsigned long long sched_clock(void) > > > > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << > > > > tb_to_ns_shift; > > > > } > > > > > > > > +unsigned long long running_clock(void) > > > > > > Non-kvm kernels don't need this code. Is there some appropriate > > > "#ifdef CONFIG_foo" we can wrap this in? > > CONFIG_PSERIES would work, having said that typical compilation for a > > powernv kernel almost always includes CONFIG_PSERIES (although it > > doesn't need to)... still, your point is valid, will add in v2. > > > > > > > > > > +{ > > > > + /* > > > > +* Don't read the VTB as a host since KVM does not switch in > > > > host timebase > > > > +* into the VTB when it takes a guest off the CPU, reading the > > > > VTB would > > > > +* result in reading 'last switched out' guest VTB. > > > > +*/ > > > > + > > > > + if (firmware_has_feature(FW_FEATURE_LPAR)) { > > > > + if (cpu_has_feature(CPU_FTR_ARCH_207S)) > > > > + return mulhdu(get_vtb() - boot_tb, > > > > tb_to_ns_scale) << tb_to_ns_shift; > > > > + > > > > + /* This is a next best approximation without a VTB. */ > > > > + return sched_clock() - > > > > cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); > > > > > > Why is this result dependent on FW_FEATURE_LPAR? It's all generic code. > > Good point, the reason it ended up there is because I wanted to avoid > > behaviour changes. > > > > > > In fact the kernel/sched/clock.c default implementation of > > > running_clock() could use this expression. Would that be good or bad? :) > > For power I'm almost certain it would be fine, on platforms which don't > > do stolen time cpustat[CPUTIME_STEAL] should always be zero and if not > > then the value should always be sane (although as mentioned in the > > comment, not as accurate as using the VTB). > > > > Putting it in the default implementation could cause behavioural changes > > for x86 and s390, I would want their views on doing that. > > I would prefer to make sched_clock do all the work. We have been thinking > about steal time vs sched_clock as well, our solution would be to exchange > the time source. Right now sched_clock is based on the TOD clock, the code > that takes steal time into account would use the CPU timer instead. > With the subtraction of kcpustat_this_cpu->cpustat[CPUTIME_STEAL] in > common code we would have to add the same value in the sched_clock > implementation as the steal time is already included in the CPU timer > deltas. Thanks for the quick reply Martin, Sound like you've got ideas and while I didn't really grasp all of that, I gather we best leave the common code as is. > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 0/2] Quieten softlockup detector on virtualised kernels
When the hypervisor pauses a virtualised kernel the kernel will observe a jump in timebase, this can cause spurious messages from the softlockup detector. Whilst these messages are harmless, they are accompanied with a stack trace which causes undue concern and more problematically the stack trace in the guest has nothing to do with the observed problem and can only be misleading. Futhermore, on POWER8 this is completely avoidable with the introduction of the Virtual Time Base (VTB) register. V2: Remove the export of running_clock Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases. Replaced the use of sched_clock_with local_clock it was used originally in the softlockup detector. Cyril Bur (2): Add another clock for use with the soft lockup watchdog. powerpc: add running_clock for powerpc to prevent spurious softlockup warnings arch/powerpc/kernel/time.c | 32 include/linux/sched.h | 1 + kernel/sched/clock.c | 13 + kernel/watchdog.c | 2 +- 4 files changed, 47 insertions(+), 1 deletion(-) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
On POWER8 virtualised kernels the VTB register can be read to have a view of time that only increases while the guest is running. This will prevent guests from seeing time jump if a guest is paused for significant amounts of time. On POWER7 and below virtualised kernels stolen time is subtracted from local_clock as a best effort approximation. This will not eliminate spurious warnings in the case of a suspended guest but may reduce the occurance in the case of softlockups due to host over commit. Bare metal kernels should avoid reading the VTB as KVM does not restore sane values when not executing, the approxmation is fine as host kernels won't observe any stolen time. Signed-off-by: Cyril Bur --- V2: Replaced the use of sched_clock_with local_clock it was used originally in the softlockup detector. Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases. --- arch/powerpc/kernel/time.c | 32 1 file changed, 32 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fa7c4f1..fd35e5b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -621,6 +621,38 @@ unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } + +#ifdef CONFIG_PPC_PSERIES + +/* + * Running clock - attempts to give a view of time passing for a virtualised + * kernels. + * Uses the VTB register if available otherwise a next best guess. + */ +unsigned long long running_clock(void) +{ + /* +* Don't read the VTB as a host since KVM does not switch in host timebase +* into the VTB when it takes a guest off the CPU, reading the VTB would +* result in reading 'last switched out' guest VTB. +* +* Host kernels are often compiled with CONFIG_PSERIES checked, it would be +* unsafe to rely only on the #ifdef above. +*/ + if (firmware_has_feature(FW_FEATURE_LPAR) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; + + /* +* This is a next best approximation without a VTB. +* On a host which is running bare metal there should never be any stolen +* time and on a host which doesn't do any virtualisation TB *should* equal +* VTB so it makes no difference anyway. +*/ + return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); +} +#endif + static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 1/2] Add another clock for use with the soft lockup watchdog.
This permits the use of arch specific clocks for which virtualised kernels can use their notion of 'running' time, not the elpased wall time which will include host execution time. Signed-off-by: Cyril Bur --- V2: Remove the export of running_clock Use local_clock instead of sched_clock as was initally used in the softlockup detector --- include/linux/sched.h | 1 + kernel/sched/clock.c | 13 + kernel/watchdog.c | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef..e400162 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2145,6 +2145,7 @@ extern unsigned long long notrace sched_clock(void); */ extern u64 cpu_clock(int cpu); extern u64 local_clock(void); +extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c27e4f8..c0a2051 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -420,3 +420,16 @@ u64 local_clock(void) EXPORT_SYMBOL_GPL(cpu_clock); EXPORT_SYMBOL_GPL(local_clock); + +/* + * Running clock - returns the time that has elapsed while a guest has been + * running. + * On a guest this value should be local_clock minus the time the guest was + * suspended by the hypervisor (for any reason). + * On bare metal this function should return the same as local_clock. + * Architectures and sub-architectures can override this. + */ +u64 __weak running_clock(void) +{ + return local_clock(); +} diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 70bf118..3174bf8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -154,7 +154,7 @@ static int get_softlockup_thresh(void) */ static unsigned long get_timestamp(void) { - return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ + return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ } static void set_sample_period(void) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] Add another clock for use with the soft lockup watchdog.
This permits the use of arch specific clocks for which virtualised kernels can use their notion of 'running' time, not the elpased wall time which will include host execution time. Signed-off-by: Cyril Bur --- include/linux/sched.h | 1 + kernel/sched/clock.c | 14 ++ kernel/watchdog.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5e344bb..12eb1d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2101,6 +2101,7 @@ extern unsigned long long notrace sched_clock(void); */ extern u64 cpu_clock(int cpu); extern u64 local_clock(void); +extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c27e4f8..c83af4f 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -74,6 +74,20 @@ unsigned long long __weak sched_clock(void) } EXPORT_SYMBOL_GPL(sched_clock); +/* + * Running clock - returns the time that has elapsed while a guest has been + * running. + * On a guest this value should be sched_clock minus the time the + * guest was suspended by the hypervisor (for any reason). + * On bare metal this function should return the same as sched_clock. + * Architectures and sub-architectures can override this. + */ +unsigned long long __weak running_clock(void) +{ + return sched_clock(); +} +EXPORT_SYMBOL_GPL(running_clock); + __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 70bf118..3174bf8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -154,7 +154,7 @@ static int get_softlockup_thresh(void) */ static unsigned long get_timestamp(void) { - return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ + return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ } static void set_sample_period(void) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
On POWER8 virtualised kernels the VTB register can be read to have a view of time that only increases while the guest is running. This will prevent guests from seeing time jump if a guest is paused for significant amounts of time. On POWER7 and below virtualised kernels stolen time is subtracted from sched_clock as a best effort approximation. This will not eliminate spurious warnings in the case of a suspended guest but may reduce the occurance in the case of softlockups due to host over commit. Bare metal kernels should avoid reading the VTB as KVM does not restore sane values when not executing. sched_clock is returned in this case. Signed-off-by: Cyril Bur --- arch/powerpc/kernel/time.c | 24 1 file changed, 24 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7505599..839aeae 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -621,6 +621,30 @@ unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } +unsigned long long running_clock(void) +{ + /* +* Don't read the VTB as a host since KVM does not switch in host timebase +* into the VTB when it takes a guest off the CPU, reading the VTB would +* result in reading 'last switched out' guest VTB. +*/ + + if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; + + /* This is a next best approximation without a VTB. */ + return sched_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); + } + + /* +* On a host which doesn't do any virtualisation TB *should* equal VTB so +* it makes no difference anyway. +*/ + + return sched_clock(); +} + static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/2] Quieten softlockup detector on virtualised kernels
When the hypervisor pauses a virtualised kernel the kernel will observe a jump in timebase, this can cause spurious messages from the softlockup detector. Whilst these messages are harmless, they are accompanied with a stack trace which causes undue concern and more problematically the stack trace in the guest has nothing to do with the observed problem and can only be misleading. Futhermore, on POWER8 this is completely avoidable with the introduction of the Virtual Time Base (VTB) register. Cyril Bur (2): Add another clock for use with the soft lockup watchdog. powerpc: add running_clock for powerpc to prevent spurious softlockup warnings arch/powerpc/kernel/time.c | 24 include/linux/sched.h | 1 + kernel/sched/clock.c | 14 ++ kernel/watchdog.c | 2 +- 4 files changed, 40 insertions(+), 1 deletion(-) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/2] Quieten softlockup detector on virtualised kernels
When the hypervisor pauses a virtualised kernel the kernel will observe a jump in timebase, this can cause spurious messages from the softlockup detector. Whilst these messages are harmless, they are accompanied with a stack trace which causes undue concern and more problematically the stack trace in the guest has nothing to do with the observed problem and can only be misleading. Futhermore, on POWER8 this is completely avoidable with the introduction of the Virtual Time Base (VTB) register. Cyril Bur (2): Add another clock for use with the soft lockup watchdog. powerpc: add running_clock for powerpc to prevent spurious softlockup warnings arch/powerpc/kernel/time.c | 24 include/linux/sched.h | 1 + kernel/sched/clock.c | 14 ++ kernel/watchdog.c | 2 +- 4 files changed, 40 insertions(+), 1 deletion(-) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
On POWER8 virtualised kernels the VTB register can be read to have a view of time that only increases while the guest is running. This will prevent guests from seeing time jump if a guest is paused for significant amounts of time. On POWER7 and below virtualised kernels stolen time is subtracted from sched_clock as a best effort approximation. This will not eliminate spurious warnings in the case of a suspended guest but may reduce the occurance in the case of softlockups due to host over commit. Bare metal kernels should avoid reading the VTB as KVM does not restore sane values when not executing. sched_clock is returned in this case. Signed-off-by: Cyril Bur --- arch/powerpc/kernel/time.c | 24 1 file changed, 24 insertions(+) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fa7c4f1..9ba13ec 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -621,6 +621,30 @@ unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } +unsigned long long running_clock(void) +{ + /* +* Don't read the VTB as a host since KVM does not switch in host timebase +* into the VTB when it takes a guest off the CPU, reading the VTB would +* result in reading 'last switched out' guest VTB. +*/ + + if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; + + /* This is a next best approximation without a VTB. */ + return sched_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); + } + + /* +* On a host which doesn't do any virtualisation TB *should* equal VTB so +* it makes no difference anyway. +*/ + + return sched_clock(); +} + static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] Add another clock for use with the soft lockup watchdog.
This permits the use of arch specific clocks for which virtualised kernels can use their notion of 'running' time, not the elpased wall time which will include host execution time. Signed-off-by: Cyril Bur --- include/linux/sched.h | 1 + kernel/sched/clock.c | 14 ++ kernel/watchdog.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef..e400162 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2145,6 +2145,7 @@ extern unsigned long long notrace sched_clock(void); */ extern u64 cpu_clock(int cpu); extern u64 local_clock(void); +extern u64 running_clock(void); extern u64 sched_clock_cpu(int cpu); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c27e4f8..c83af4f 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -74,6 +74,20 @@ unsigned long long __weak sched_clock(void) } EXPORT_SYMBOL_GPL(sched_clock); +/* + * Running clock - returns the time that has elapsed while a guest has been + * running. + * On a guest this value should be sched_clock minus the time the + * guest was suspended by the hypervisor (for any reason). + * On bare metal this function should return the same as sched_clock. + * Architectures and sub-architectures can override this. + */ +unsigned long long __weak running_clock(void) +{ + return sched_clock(); +} +EXPORT_SYMBOL_GPL(running_clock); + __read_mostly int sched_clock_running; #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 70bf118..3174bf8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -154,7 +154,7 @@ static int get_softlockup_thresh(void) */ static unsigned long get_timestamp(void) { - return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ + return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ } static void set_sample_period(void) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] powerpc: add running_clock for powerpc to prevent spurious softlockup warnings
On Wed, 2015-02-04 at 11:42 +0100, Paul Bolle wrote: > On Fri, 2015-01-09 at 14:34 +1100, Cyril Bur wrote: > > On POWER8 virtualised kernels the VTB register can be read to have a view of > > time that only increases while the guest is running. This will prevent > > guests > > from seeing time jump if a guest is paused for significant amounts of time. > > > > On POWER7 and below virtualised kernels stolen time is subtracted from > > local_clock as a best effort approximation. This will not eliminate spurious > > warnings in the case of a suspended guest but may reduce the occurance in > > the > > case of softlockups due to host over commit. > > > > Bare metal kernels should avoid reading the VTB as KVM does not restore sane > > values when not executing, the approxmation is fine as host kernels won't > > observe any stolen time. > > > > Signed-off-by: Cyril Bur > > --- > > V2: > >Replaced the use of sched_clock_with local_clock it was used originally > > in > > the softlockup detector. > >Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases. > > This became commit 3e5aba51e929 ("powerpc: add running_clock for powerpc > to prevent spurious softlockup warnings") in today's linux-next (ie, > next-20150204). I noticed because a script I use to check linux-next > spotted a trivial issues with it. > > > --- > > arch/powerpc/kernel/time.c | 32 > > 1 file changed, 32 insertions(+) > > > > diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c > > index fa7c4f1..fd35e5b 100644 > > --- a/arch/powerpc/kernel/time.c > > +++ b/arch/powerpc/kernel/time.c > > @@ -621,6 +621,38 @@ unsigned long long sched_clock(void) > > return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; > > } > > > > + > > +#ifdef CONFIG_PPC_PSERIES > > + > > +/* > > + * Running clock - attempts to give a view of time passing for a > > virtualised > > + * kernels. > > + * Uses the VTB register if available otherwise a next best guess. > > + */ > > +unsigned long long running_clock(void) > > +{ > > + /* > > +* Don't read the VTB as a host since KVM does not switch in host > > timebase > > +* into the VTB when it takes a guest off the CPU, reading the VTB would > > +* result in reading 'last switched out' guest VTB. > > +* > > +* Host kernels are often compiled with CONFIG_PSERIES checked, it > > would be > > You obviously wanted to use CONFIG_PPC_PSERIES here. > Yep, sorry. > Should I submit a trivial patch to fix that typo or do you prefer to do > that yourself? > Indeed a trivial fixup, I've pasted the hunk below. Andrew are you ok to fold it in? Thanks, Cyril diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fd35e5b..770dc16 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -636,8 +636,8 @@ unsigned long long running_clock(void) * into the VTB when it takes a guest off the CPU, reading the VTB would * result in reading 'last switched out' guest VTB. * -* Host kernels are often compiled with CONFIG_PSERIES checked, it would be -* unsafe to rely only on the #ifdef above. +* Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it +* would be unsafe to rely only on the #ifdef above. */ if (firmware_has_feature(FW_FEATURE_LPAR) && cpu_has_feature(CPU_FTR_ARCH_207S)) > Thanks, > > > Paul Bolle > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 0/2] Quieten softlockup detector on virtualised kernels
Hi Andrew, Could you please pick these patches up through your tree? Thanks, Cyril On Fri, 2015-01-09 at 14:34 +1100, Cyril Bur wrote: > When the hypervisor pauses a virtualised kernel the kernel will observe a jump > in timebase, this can cause spurious messages from the softlockup detector. > > Whilst these messages are harmless, they are accompanied with a stack trace > which causes undue concern and more problematically the stack trace in the > guest has nothing to do with the observed problem and can only be misleading. > > Futhermore, on POWER8 this is completely avoidable with the introduction of > the Virtual Time Base (VTB) register. > > V2: > Remove the export of running_clock > Added #ifdef CONFIG_PPC_PSERIES and optimised the non lpar + vtb cases. > Replaced the use of sched_clock_with local_clock it was used originally > in > the softlockup detector. > > Cyril Bur (2): > Add another clock for use with the soft lockup watchdog. > powerpc: add running_clock for powerpc to prevent spurious softlockup > warnings > > arch/powerpc/kernel/time.c | 32 > include/linux/sched.h | 1 + > kernel/sched/clock.c | 13 + > kernel/watchdog.c | 2 +- > 4 files changed, 47 insertions(+), 1 deletion(-) > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] powerpc: process.c: fix Kconfig typo
On Wed, 2016-10-26 at 16:52 +1100, Michael Ellerman wrote: > Cyril Bur writes: > > > On Wed, 2016-10-05 at 07:57 +0200, Valentin Rothberg wrote: > > > s/ALIVEC/ALTIVEC/ > > > > > > > Oops, nice catch > > > > > Signed-off-by: Valentin Rothberg > > > > Reviewed-by: Cyril Bur > > How did we not notice? Sounds like we need a new selftest. > Indeed... Here is probably a good place to say why we didn't catch it and under what circumstances this will have a negative effect. If a thread performs and transaction with altivec and then gets preempted for whatever reason, this bug may cause the kernel to not reenable altivec when that thread runs again. This will result in an altivec unavailable fault, when these faults happen inside a user transaction the kernel has no choice but enable altivec and doom the transaction. The result is that transactions using altivec may get aborted more than they should. The difficulty in catching this with a selftest is my deliberate use of the word may above. Optimisations to avoid FPU/altivec/VSX faults mean that the kernel will always leave them on for 255 switches, this code prevents the kernel turning it off if it got to the 256th switch (and userspace was transactional)... Cyril > Looks like this should have: > > Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware > transactional memory in use") > > > And I guess I need to start running checkkconfigsymbols.py on every > commit. > > cheers
Re: [PATCH] powerpc: process.c: fix Kconfig typo
On Wed, 2016-10-05 at 07:57 +0200, Valentin Rothberg wrote: > s/ALIVEC/ALTIVEC/ > Oops, nice catch > Signed-off-by: Valentin Rothberg Reviewed-by: Cyril Bur > --- > arch/powerpc/kernel/process.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/powerpc/kernel/process.c > b/arch/powerpc/kernel/process.c > index 9e7c10fe205f..ce6dc61b15b2 100644 > --- a/arch/powerpc/kernel/process.c > +++ b/arch/powerpc/kernel/process.c > @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs) > /* Ensure that restore_math() will restore */ > if (msr_diff & MSR_FP) > current->thread.load_fp = 1; > -#ifdef CONFIG_ALIVEC > +#ifdef CONFIG_ALTIVEC > if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) > current->thread.load_vec = 1; > #endif
[PATCH] drivers/misc: Aspeed LPC control fix compile error and warning
pgprot_dmachoerent() is not defined on every architecture. Having COMPILE_TEST set for the driver causes it to be compiled on architectures which do not have pgprot_dmachoerent(): drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_mmap': drivers/misc/aspeed-lpc-ctrl.c:51:9: error: implicit declaration of function 'pgprot_dmacoherent' [-Werror=implicit-function-declaration] prot = pgprot_dmacoherent(prot); There are two possible solutions: 1. Remove COMPILE_TEST to ensure the driver is only compiled on ARM 2. Use pgprot_noncached() instead of pgprot_dmachoerent() The first option results in less compile testing of the LPC control driver which is undesirable. The second option uses a function that is declared on all architectures and therefore should always build. Currently there is no practical difference between pgprot_noncached() and pgprot_dmachoerent() for the aspeed chips that this driver is compatible with. The reason for pgprot_dmachoerent() was that there may be chips made at some point in the future that could include hardware that pgprot_dmachoerent() could optimise for. As none of this hardware has even been announced there isn't really a need for pgprot_dmachoerent(). Using pgprot_noncached() is completely correct and optimal for all existing hardware on which the LPC control driver will run. This commit also addresses that phys_addr_t should be printed using %pap rather than %x: In file included from include/linux/miscdevice.h:6:0, from drivers/misc/aspeed-lpc-ctrl.c:11: drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_probe': drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'phys_addr_t {aka long long unsigned int}' [-Wformat=] dev_info(dev, "Loaded at 0x%08x (0x%08x)\n", Signed-off-by: Cyril Bur --- drivers/misc/aspeed-lpc-ctrl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c index f6acbe1d9378..c654651a7b6d 100644 --- a/drivers/misc/aspeed-lpc-ctrl.c +++ b/drivers/misc/aspeed-lpc-ctrl.c @@ -48,7 +48,7 @@ static int aspeed_lpc_ctrl_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; /* ast2400/2500 AHB accesses are not cache coherent */ - prot = pgprot_dmacoherent(prot); + prot = pgprot_noncached(prot); if (remap_pfn_range(vma, vma->vm_start, (lpc_ctrl->mem_base >> PAGE_SHIFT) + vma->vm_pgoff, @@ -229,8 +229,8 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) if (rc) dev_err(dev, "Unable to register device\n"); else - dev_info(dev, "Loaded at 0x%08x (0x%08x)\n", - lpc_ctrl->mem_base, lpc_ctrl->mem_size); + dev_info(dev, "Loaded at %pap (0x%08x)\n", + &lpc_ctrl->mem_base, lpc_ctrl->mem_size); return rc; } -- 2.12.0
[PATCH] drivers/misc: Aspeed LPC control fix format string warning
resource_size_t is a derivative of phys_addr_t and should also be printed with %pap: drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t {aka long long unsigned int}' [-Wformat=] dev_info(dev, "Loaded at %pap (0x%08x)\n", Reported-by: Stephen Rothwell Signed-off-by: Cyril Bur --- drivers/misc/aspeed-lpc-ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c index c654651a7b6d..2f6bb2244be5 100644 --- a/drivers/misc/aspeed-lpc-ctrl.c +++ b/drivers/misc/aspeed-lpc-ctrl.c @@ -229,8 +229,8 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) if (rc) dev_err(dev, "Unable to register device\n"); else - dev_info(dev, "Loaded at %pap (0x%08x)\n", - &lpc_ctrl->mem_base, lpc_ctrl->mem_size); + dev_info(dev, "Loaded at %pap (%pap)\n", + &lpc_ctrl->mem_base, &lpc_ctrl->mem_size); return rc; } -- 2.12.1
Re: linux-next: build failure after merge of the char-misc tree
On Tue, 2017-03-21 at 11:18 +1100, Benjamin Herrenschmidt wrote: > On Mon, 2017-03-20 at 13:23 +0100, Arnd Bergmann wrote: > > On Mon, Mar 20, 2017 at 3:44 AM, Stephen Rothwell > > wrote: > > > Hi all, > > > > > > After merging the char-misc tree, today's linux-next build (x86_64 > > > allmodconfig) failed like this: > > > > > > drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_mmap': > > > drivers/misc/aspeed-lpc-ctrl.c:51:9: error: implicit declaration of > > > function 'pgprot_dmacoherent' [-Werror=implicit-function-declaration] > > > prot = pgprot_dmacoherent(prot); > > > > A lot of other drivers (including /dev/mem) just use pgprot_noncached() or > > pgprot_writecombine(), which would make the code portable and might be > > what you want here as well. > > > > pgprot_dmacoherent() is meant specifically for mapping RAM that is used > > for DMA buffers that come from dma_alloc_coherent(), which doesn't seem > > to be the case here. > > > > What kind of address range is this really? > > It's a piece of RAM that we reserve via a reserved region, which will > be accessed by HW (sort-of-DMA, ie, the "host" system will access that > using FW cycles on the LPC bus which we map to that reserved region of > memory). > > Joel, Cyril, can you send a 1-liner patch to change that to > pgprot_noncached() ? > Sure. Just to be clear - we want to keep COMPILE_TEST in kconfig? Also I can't help but notice this: https://lists.ozlabs.org/pipermail/openbmc/2017-January/006219.html [v3 of the series] vs https://lists.ozlabs.org/pipermail/openbmc/2017-February/006462.html [v4 of the series] > Cheers, > Ben. > > > > drivers/misc/aspeed-lpc-ctrl.c:51:7: error: incompatible types when > > > assigning to type 'pgprot_t {aka struct pgprot}' from type 'int' > > > prot = pgprot_dmacoherent(prot); > > > ^ > > > In file included from include/linux/miscdevice.h:6:0, > > > from drivers/misc/aspeed-lpc-ctrl.c:11: > > > drivers/misc/aspeed-lpc-ctrl.c: In function 'aspeed_lpc_ctrl_probe': > > > drivers/misc/aspeed-lpc-ctrl.c:232:17: warning: format '%x' expects > > > argument of type 'unsigned int', but argument 3 has type 'phys_addr_t > > > {aka long long unsigned int}' [-Wformat=] > > > dev_info(dev, "Loaded at 0x%08x (0x%08x)\n", > > > > This should just use the "%pap" for printing a phys_addr_t, otherwise we > > get the same warning on ARM in some configurations. Thanks Arnd, I'll address that too. > > > > Arnd
Re: [PATCH v6] drivers/misc: Add Aspeed LPC control driver
On Wed, 2017-02-22 at 16:11 +1100, Suraj Jitindar Singh wrote: > On Fri, 2017-02-17 at 14:28 +1100, Cyril Bur wrote: > > I may be too late, but see below... > > In order to manage server systems, there is typically another > > processor > > known as a BMC (Baseboard Management Controller) which is responsible > > for powering the server and other various elements, sometimes fans, > > often the system flash. > > > > The Aspeed BMC family which is what is used on OpenPOWER machines and > > a > > number of x86 as well is typically connected to the host via an LPC > > (Low Pin Count) bus (among others). > > > > The LPC bus is an ISA bus on steroids. It's generally used by the > > BMC chip to provide the host with access to the system flash (via > > MEM/FW > > cycles) that contains the BIOS or other host firmware along with a > > number of SuperIO-style IOs (via IO space) such as UARTs, IPMI > > controllers. > > > > On the BMC chip side, this is all configured via a bunch of registers > > whose content is related to a given policy of what devices are > > exposed > > at a per system level, which is system/vendor specific, so we don't > > want > > to bolt that into the BMC kernel. This started with a need to provide > > something nicer than /dev/mem for user space to configure these > > things. > > > > One important aspect of the configuration is how the MEM/FW space is > > exposed to the host (ie, the x86 or POWER). Some registers in that > > bridge can define a window remapping all or portion of the LPC MEM/FW > > space to a portion of the BMC internal bus, with no specific limits > > imposed in HW. > > > > I think it makes sense to ensure that this window is configured by a > > kernel driver that can apply some serious sanity checks on what it is > > configured to map. > > > > In practice, user space wants to control this by flipping the mapping > > between essentially two types of portions of the BMC address space: > > > > - The flash space. This is a region of the BMC MMIO space that > > more/less directly maps the system flash (at least for reads, writes > > are somewhat more complicated). > > > > - One (or more) reserved area(s) of the BMC physical memory. > > > > The latter is needed for a number of things, such as avoiding letting > > the host manipulate the innards of the BMC flash controller via some > > evil backdoor, we want to do flash updates by routing the window to a > > portion of memory (under control of a mailbox protocol via some > > separate set of registers) which the host can use to write new data > > in > > bulk and then request the BMC to flash it. There are other uses, such > > as allowing the host to boot from an in-memory flash image rather > > than > > the one in flash (very handy for continuous integration and test, the > > BMC can just download new images). > > > > It is important to note that due to the way the Aspeed chip lets the > > kernel configure the mapping between host LPC addresses and BMC ram > > addresses the offset within the window must be a multiple of size. > > Not doing so will fragment the accessible space rather than simply > > moving 'zero' upwards. This is caused by the nature of HICR8 being a > > mask and the way host LPC addresses are translated. > > > > Signed-off-by: Cyril Bur > > --- > > v2: > > Removed unused functions > > Removed use of access_ok() > > All input is evil > > Reworked the interface as per Benjamin Herrenschmidts vision > > v3: > > Removed 'default y' from Kconfig > > Reordered ioctl() struct fields > > Reworeded some comments > > v4: > > Reorder ioctl() struct fields (again) > > v5: > > Style cleanups > > Use of_address_to_resource() > > Document ioctl structure fields > > v6: > > Check and fail ioctl() if flags field non-zero > > > > drivers/misc/Kconfig | 8 ++ > > drivers/misc/Makefile| 1 + > > drivers/misc/aspeed-lpc-ctrl.c | 267 > > +++ > > include/uapi/linux/aspeed-lpc-ctrl.h | 60 > > 4 files changed, 336 insertions(+) > > create mode 100644 drivers/misc/aspeed-lpc-ctrl.c > > create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h > > > > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig > > index 64971baf11fa..7dc4c369012f 100644 > > --- a/drivers/misc/Kco
[PATCH v5] drivers/misc: Add Aspeed LPC control driver
In order to manage server systems, there is typically another processor known as a BMC (Baseboard Management Controller) which is responsible for powering the server and other various elements, sometimes fans, often the system flash. The Aspeed BMC family which is what is used on OpenPOWER machines and a number of x86 as well is typically connected to the host via an LPC (Low Pin Count) bus (among others). The LPC bus is an ISA bus on steroids. It's generally used by the BMC chip to provide the host with access to the system flash (via MEM/FW cycles) that contains the BIOS or other host firmware along with a number of SuperIO-style IOs (via IO space) such as UARTs, IPMI controllers. On the BMC chip side, this is all configured via a bunch of registers whose content is related to a given policy of what devices are exposed at a per system level, which is system/vendor specific, so we don't want to bolt that into the BMC kernel. This started with a need to provide something nicer than /dev/mem for user space to configure these things. One important aspect of the configuration is how the MEM/FW space is exposed to the host (ie, the x86 or POWER). Some registers in that bridge can define a window remapping all or portion of the LPC MEM/FW space to a portion of the BMC internal bus, with no specific limits imposed in HW. I think it makes sense to ensure that this window is configured by a kernel driver that can apply some serious sanity checks on what it is configured to map. In practice, user space wants to control this by flipping the mapping between essentially two types of portions of the BMC address space: - The flash space. This is a region of the BMC MMIO space that more/less directly maps the system flash (at least for reads, writes are somewhat more complicated). - One (or more) reserved area(s) of the BMC physical memory. The latter is needed for a number of things, such as avoiding letting the host manipulate the innards of the BMC flash controller via some evil backdoor, we want to do flash updates by routing the window to a portion of memory (under control of a mailbox protocol via some separate set of registers) which the host can use to write new data in bulk and then request the BMC to flash it. There are other uses, such as allowing the host to boot from an in-memory flash image rather than the one in flash (very handy for continuous integration and test, the BMC can just download new images). It is important to note that due to the way the Aspeed chip lets the kernel configure the mapping between host LPC addresses and BMC ram addresses the offset within the window must be a multiple of size. Not doing so will fragment the accessible space rather than simply moving 'zero' upwards. This is caused by the nature of HICR8 being a mask and the way host LPC addresses are translated. Signed-off-by: Cyril Bur --- v2: Removed unused functions Removed use of access_ok() All input is evil Reworked the interface as per Benjamin Herrenschmidts vision v3: Removed 'default y' from Kconfig Reordered ioctl() struct fields Reworeded some comments v4: Reorder ioctl() struct fields (again) v5: Style cleanups Use of_address_to_resource() Document ioctl structure fields drivers/misc/Kconfig | 8 ++ drivers/misc/Makefile| 1 + drivers/misc/aspeed-lpc-ctrl.c | 264 +++ include/uapi/linux/aspeed-lpc-ctrl.h | 60 4 files changed, 333 insertions(+) create mode 100644 drivers/misc/aspeed-lpc-ctrl.c create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 64971baf11fa..7dc4c369012f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. +config ASPEED_LPC_CTRL + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control" + ---help--- + Control Aspeed ast2400/2500 HOST LPC to BMC mappings through + ioctl()s, the driver also provides a read/write interface to a BMC ram + region where the host LPC read/write region can be buffered. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 31983366090a..de1925a9c80b 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_ASPEED_LP
[PATCH v6] drivers/misc: Add Aspeed LPC control driver
In order to manage server systems, there is typically another processor known as a BMC (Baseboard Management Controller) which is responsible for powering the server and other various elements, sometimes fans, often the system flash. The Aspeed BMC family which is what is used on OpenPOWER machines and a number of x86 as well is typically connected to the host via an LPC (Low Pin Count) bus (among others). The LPC bus is an ISA bus on steroids. It's generally used by the BMC chip to provide the host with access to the system flash (via MEM/FW cycles) that contains the BIOS or other host firmware along with a number of SuperIO-style IOs (via IO space) such as UARTs, IPMI controllers. On the BMC chip side, this is all configured via a bunch of registers whose content is related to a given policy of what devices are exposed at a per system level, which is system/vendor specific, so we don't want to bolt that into the BMC kernel. This started with a need to provide something nicer than /dev/mem for user space to configure these things. One important aspect of the configuration is how the MEM/FW space is exposed to the host (ie, the x86 or POWER). Some registers in that bridge can define a window remapping all or portion of the LPC MEM/FW space to a portion of the BMC internal bus, with no specific limits imposed in HW. I think it makes sense to ensure that this window is configured by a kernel driver that can apply some serious sanity checks on what it is configured to map. In practice, user space wants to control this by flipping the mapping between essentially two types of portions of the BMC address space: - The flash space. This is a region of the BMC MMIO space that more/less directly maps the system flash (at least for reads, writes are somewhat more complicated). - One (or more) reserved area(s) of the BMC physical memory. The latter is needed for a number of things, such as avoiding letting the host manipulate the innards of the BMC flash controller via some evil backdoor, we want to do flash updates by routing the window to a portion of memory (under control of a mailbox protocol via some separate set of registers) which the host can use to write new data in bulk and then request the BMC to flash it. There are other uses, such as allowing the host to boot from an in-memory flash image rather than the one in flash (very handy for continuous integration and test, the BMC can just download new images). It is important to note that due to the way the Aspeed chip lets the kernel configure the mapping between host LPC addresses and BMC ram addresses the offset within the window must be a multiple of size. Not doing so will fragment the accessible space rather than simply moving 'zero' upwards. This is caused by the nature of HICR8 being a mask and the way host LPC addresses are translated. Signed-off-by: Cyril Bur --- v2: Removed unused functions Removed use of access_ok() All input is evil Reworked the interface as per Benjamin Herrenschmidts vision v3: Removed 'default y' from Kconfig Reordered ioctl() struct fields Reworeded some comments v4: Reorder ioctl() struct fields (again) v5: Style cleanups Use of_address_to_resource() Document ioctl structure fields v6: Check and fail ioctl() if flags field non-zero drivers/misc/Kconfig | 8 ++ drivers/misc/Makefile| 1 + drivers/misc/aspeed-lpc-ctrl.c | 267 +++ include/uapi/linux/aspeed-lpc-ctrl.h | 60 4 files changed, 336 insertions(+) create mode 100644 drivers/misc/aspeed-lpc-ctrl.c create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 64971baf11fa..7dc4c369012f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. +config ASPEED_LPC_CTRL + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control" + ---help--- + Control Aspeed ast2400/2500 HOST LPC to BMC mappings through + ioctl()s, the driver also provides a read/write interface to a BMC ram + region where the host LPC read/write region can be buffered. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 31983366090a..de1925a9c80b 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL
[PATCH v2] drivers/mailbox: Add Aspeed mailbox driver
This provides access to the mbox registers on the ast2400 and ast2500 SoCs. This driver allows arbitrary reads and writes to the 16 data registers as the other end may have configured the mbox hardware to provide an interrupt when a specific register gets written to. Signed-off-by: Cyril Bur --- V2: s/ASpeed/Aspeed/ Reword Kconfig options Use tristate for config symbol drivers/mailbox/Kconfig | 8 + drivers/mailbox/Makefile | 2 + drivers/mailbox/aspeed-mbox.c | 334 ++ 3 files changed, 344 insertions(+) create mode 100644 drivers/mailbox/aspeed-mbox.c diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index ceff415f201c..e24044d5c219 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -152,4 +152,12 @@ config BCM_PDC_MBOX Mailbox implementation for the Broadcom PDC ring manager, which provides access to various offload engines on Broadcom SoCs. Say Y here if you want to use the Broadcom PDC. + +config ASPEED_LPC_MBOX + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed LPC Mailbox Controller" + ---help--- + Provides a driver for the MBOX registers found on Aspeed SOCs + (AST2400 and AST2500). This driver provides a device for aspeed + mbox registers endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 7dde4f609ae8..73165a79b517 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -31,3 +31,5 @@ obj-$(CONFIG_HI6220_MBOX) += hi6220-mailbox.o obj-$(CONFIG_BCM_PDC_MBOX) += bcm-pdc-mailbox.o obj-$(CONFIG_TEGRA_HSP_MBOX) += tegra-hsp.o + +obj-$(CONFIG_ASPEED_LPC_MBOX) += aspeed-mbox.o diff --git a/drivers/mailbox/aspeed-mbox.c b/drivers/mailbox/aspeed-mbox.c new file mode 100644 index ..0f0c711bafee --- /dev/null +++ b/drivers/mailbox/aspeed-mbox.c @@ -0,0 +1,334 @@ +/* + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEVICE_NAME"aspeed-mbox" + +#define ASPEED_MBOX_NUM_REGS 16 + +#define ASPEED_MBOX_DATA_0 0x00 +#define ASPEED_MBOX_STATUS_0 0x40 +#define ASPEED_MBOX_STATUS_1 0x44 +#define ASPEED_MBOX_BMC_CTRL 0x48 +#define ASPEED_MBOX_CTRL_RECV BIT(7) +#define ASPEED_MBOX_CTRL_MASK BIT(1) +#define ASPEED_MBOX_CTRL_SEND BIT(0) +#define ASPEED_MBOX_HOST_CTRL 0x4c +#define ASPEED_MBOX_INTERRUPT_0 0x50 +#define ASPEED_MBOX_INTERRUPT_1 0x54 + +struct aspeed_mbox { + struct miscdevice miscdev; + struct regmap *regmap; + unsigned intbase; + wait_queue_head_t queue; + struct mutexmutex; +}; + +static atomic_t aspeed_mbox_open_count = ATOMIC_INIT(0); + +static u8 aspeed_mbox_inb(struct aspeed_mbox *mbox, int reg) +{ + /* +* The mbox registers are actually only one byte but are addressed +* four bytes apart. The other three bytes are marked 'reserved', +* they *should* be zero but lets not rely on it. +* I am going to rely on the fact we can casually read/write to them... +*/ + unsigned int val = 0xff; /* If regmap throws an error return 0xff */ + int rc = regmap_read(mbox->regmap, mbox->base + reg, &val); + + if (rc) + dev_err(mbox->miscdev.parent, "regmap_read() failed with " + "%d (reg: 0x%08x)\n", rc, reg); + + return val & 0xff; +} + +static void aspeed_mbox_outb(struct aspeed_mbox *mbox, u8 data, int reg) +{ + int rc = regmap_write(mbox->regmap, mbox->base + reg, data); + + if (rc) + dev_err(mbox->miscdev.parent, "regmap_write() failed with " + "%d (data: %u reg: 0x%08x)\n", rc, data, reg); +} + +static struct aspeed_mbox *file_mbox(struct file *file) +{ + return container_of(file->private_data, struct aspeed_mbox, miscdev); +} + +static int aspeed_mbox_open(struct inode *inode, struct file *file) +{ + struct aspeed_mbox *mbox = file_mbox(file); + + if (atomic_inc_return(&aspeed_mbox_open_count) == 1) { + /* +* Clear the interrupt status bit if it was left on and unmask +* interrupts. +* ASPEED_MBOX_CTRL_RECV bit is W1C, this also unmasks in 1 step +*/ + aspeed_mbox_outb(mbox, ASPEED_MBOX_CTRL_RECV, ASPEED_MBOX_BMC_CTRL); + return 0; + } + + atomic_dec(&am
[PATCH v4] drivers/misc: Add Aspeed LPC control driver
In order to manage server systems, there is typically another processor known as a BMC (Baseboard Management Controller) which is responsible for powering the server and other various elements, sometimes fans, often the system flash. The Aspeed BMC family which is what is used on OpenPOWER machines and a number of x86 as well is typically connected to the host via an LPC (Low Pin Count) bus (among others). The LPC bus is an ISA bus on steroids. It's generally used by the BMC chip to provide the host with access to the system flash (via MEM/FW cycles) that contains the BIOS or other host firmware along with a number of SuperIO-style IOs (via IO space) such as UARTs, IPMI controllers. On the BMC chip side, this is all configured via a bunch of registers whose content is related to a given policy of what devices are exposed at a per system level, which is system/vendor specific, so we don't want to bolt that into the BMC kernel. This started with a need to provide something nicer than /dev/mem for user space to configure these things. One important aspect of the configuration is how the MEM/FW space is exposed to the host (ie, the x86 or POWER). Some registers in that bridge can define a window remapping all or portion of the LPC MEM/FW space to a portion of the BMC internal bus, with no specific limits imposed in HW. I think it makes sense to ensure that this window is configured by a kernel driver that can apply some serious sanity checks on what it is configured to map. In practice, user space wants to control this by flipping the mapping between essentially two types of portions of the BMC address space: - The flash space. This is a region of the BMC MMIO space that more/less directly maps the system flash (at least for reads, writes are somewhat more complicated). - One (or more) reserved area(s) of the BMC physical memory. The latter is needed for a number of things, such as avoiding letting the host manipulate the innards of the BMC flash controller via some evil backdoor, we want to do flash updates by routing the window to a portion of memory (under control of a mailbox protocol via some separate set of registers) which the host can use to write new data in bulk and then request the BMC to flash it. There are other uses, such as allowing the host to boot from an in-memory flash image rather than the one in flash (very handy for continuous integration and test, the BMC can just download new images). It is important to note that due to the way the Aspeed chip lets the kernel configure the mapping between host LPC addresses and BMC ram addresses the offset within the window must be a multiple of size. Not doing so will fragment the accessible space rather than simply moving 'zero' upwards. This is caused by the nature of HICR8 being a mask and the way host LPC addresses are translated. Signed-off-by: Cyril Bur --- v2: Removed unused functions Removed use of access_ok() All input is evil Reworked the interface as per Benjamin Herrenschmidts vision V3: Removed 'default y' from Kconfig Reordered ioctl() struct fields Reworeded some comments V4: Reorder ioctl() struct fields (again) drivers/misc/Kconfig | 8 ++ drivers/misc/Makefile| 1 + drivers/misc/aspeed-lpc-ctrl.c | 263 +++ include/uapi/linux/aspeed-lpc-ctrl.h | 36 + 4 files changed, 308 insertions(+) create mode 100644 drivers/misc/aspeed-lpc-ctrl.c create mode 100644 include/uapi/linux/aspeed-lpc-ctrl.h diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 64971baf11fa..7dc4c369012f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -766,6 +766,14 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. +config ASPEED_LPC_CTRL + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control" + ---help--- + Control Aspeed ast2400/2500 HOST LPC to BMC mappings through + ioctl()s, the driver also provides a read/write interface to a BMC ram + region where the host LPC read/write region can be buffered. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 31983366090a..de1925a9c80b 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_ECHO)+= echo/ obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o lkdtm-$(CONFIG_LKDTM)
Re: [PATCH V10 02/28] powerpc, process: Add the function flush_tmregs_to_thread
On Tue, 16 Feb 2016 14:29:32 +0530 Anshuman Khandual wrote: > This patch creates a function flush_tmregs_to_thread which > will then be used by subsequent patches in this series. The > function checks for self tracing ptrace interface attempts > while in the TM context and logs appropriate warning message. > Hi Anshuman, You'll have to bare with me, my ptrace knowledge is non existent so you might have to walk me though some aspects. I have been playing with FPU/VMX and VSX saving so I thought I'd take a look. > Signed-off-by: Anshuman Khandual > --- > arch/powerpc/include/asm/switch_to.h | 8 > arch/powerpc/kernel/process.c| 20 > 2 files changed, 28 insertions(+) > > diff --git a/arch/powerpc/include/asm/switch_to.h > b/arch/powerpc/include/asm/switch_to.h > index 5b268b6..7b297bf 100644 > --- a/arch/powerpc/include/asm/switch_to.h > +++ b/arch/powerpc/include/asm/switch_to.h > @@ -70,6 +70,14 @@ static inline void disable_kernel_spe(void) > } > #endif > > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > +extern void flush_tmregs_to_thread(struct task_struct *); > +#else > +static inline void flush_tmregs_to_thread(struct task_struct *t) > +{ > +} > +#endif > + > static inline void clear_task_ebb(struct task_struct *t) > { > #ifdef CONFIG_PPC_BOOK3S_64 > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c > index dccc87e..2c4fa7f 100644 > --- a/arch/powerpc/kernel/process.c > +++ b/arch/powerpc/kernel/process.c > @@ -918,6 +918,26 @@ static inline void restore_sprs(struct thread_struct > *old_thread, > #endif > } > > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > +void flush_tmregs_to_thread(struct task_struct *tsk) > +{ > + /* > + * Process self tracing is not yet supported through > + * ptrace interface. Ptrace generic code should have > + * prevented this from happening in the first place. > + * Warn once here with the message, if some how it > + * is attempted. > + */ > + WARN_ONCE(tsk == current, > + "Not expecting ptrace on self: TM regs may be incorrect\n"); > + > + /* > + * If task is not current, it should have been flushed > + * already to it's thread_struct during __switch_to(). > + */ I totally agree except this highlights something that I notice in subsequent patches, and existing code. All the *_{get,set}() functions call flush_*_to_thread() when, as per your comment (and my understanding of task switching) there really shouldn't be a need to do that. My only thought is that this could be a relic of uniprocessor days when it would have been necessary but Anton recently stripped that out. Are you able to shed some light here? The reason I ask is that if the flush_*_to_thread() calls ARE actually important then I worry that this function is inadequate... > +} > +#endif > + > struct task_struct *__switch_to(struct task_struct *prev, > struct task_struct *new) > {
Re: [PATCH V10 17/28] selftests, powerpc: Add ptrace tests for EBB
On Tue, 16 Feb 2016 14:29:47 +0530 Anshuman Khandual wrote: > This patch adds ptrace interface test for EBB specific > registers. This also adds some generic ptrace interface > based helper functions to be used by other patches later > on in the series. > > Signed-off-by: Anshuman Khandual > --- > tools/testing/selftests/powerpc/Makefile | 3 +- > tools/testing/selftests/powerpc/ptrace/Makefile| 7 + > .../testing/selftests/powerpc/ptrace/ptrace-ebb.c | 150 ++ > .../testing/selftests/powerpc/ptrace/ptrace-ebb.h | 103 ++ > tools/testing/selftests/powerpc/ptrace/ptrace.h| 225 > + > 5 files changed, 487 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/Makefile > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-ebb.h > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.h > > diff --git a/tools/testing/selftests/powerpc/Makefile > b/tools/testing/selftests/powerpc/Makefile > index 0c2706b..5b3c62c 100644 > --- a/tools/testing/selftests/powerpc/Makefile > +++ b/tools/testing/selftests/powerpc/Makefile > @@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \ > switch_endian\ > syscalls \ > tm \ > -vphn > +vphn \ > +ptrace > > endif > > diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile > b/tools/testing/selftests/powerpc/ptrace/Makefile > new file mode 100644 > index 000..8666ac0 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/Makefile > @@ -0,0 +1,7 @@ > +TEST_PROGS := ptrace-ebb > +all: $(TEST_PROGS) > + > +$(TEST_PROGS): ../harness.c ptrace.S ../utils.c > +ptrace-ebb: ../pmu/event.c ../pmu/lib.c ../pmu/ebb/ebb_handler.S > ../pmu/ebb/busy_loop.S > +clean: > + rm -f $(TEST_PROGS) *.o If you: include ../../lib.mk Then this Makefile just works with run_tests and install targets from the base Makefile > diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c > b/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c > new file mode 100644 > index 000..e1ca608 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-ebb.c > @@ -0,0 +1,150 @@ > +/* > + * Ptrace interface test for EBB > + * > + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#include "../pmu/ebb/ebb.h" > +#include "ptrace.h" > +#include "ptrace-ebb.h" > + Check out tools/testing/selftests/powerpc/utils.h which provides a nice (and super hacky) FAIL_IF() macro. > +void ebb(void) > +{ > + struct event event; > + > + event_init_named(&event, 0x1001e, "cycles"); > + event.attr.config |= (1ull << 63); > +event.attr.exclusive = 1; > +event.attr.pinned = 1; > + event.attr.exclude_kernel = 1; > + event.attr.exclude_hv = 1; > + event.attr.exclude_idle = 1; > + > + if (event_open(&event)) { > + perror("event_open() failed"); > + exit(1); > + } > + > + setup_ebb_handler(standard_ebb_callee); > + mtspr(SPRN_BESCR, 0x8001ull); > + > + mb(); > + > + if (ebb_event_enable(&event)) { > + perror("ebb_event_handler() failed"); > + exit(1); > + } > + > + mtspr(SPRN_PMC1, pmc_sample_period(SAMPLE_PERIOD)); > + while(1) > + core_busy_loop(); > + exit(0); > +} > + > +int validate_ebb(struct ebb_regs *regs) > +{ > + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ > + struct opd *opd = (struct opd *) ebb_handler; > + #endif > + > + printf("EBBRR: %lx\n", regs->ebbrr); > + printf("EBBHR: %lx\n", regs->ebbhr); > + printf("BESCR: %lx\n", regs->bescr); > + printf("SIAR: %lx\n", regs->siar); > + printf("SDAR: %lx\n", regs->sdar); > + printf("SIER: %lx\n", regs->sier); > + printf("MMCR2: %lx\n", regs->mmcr2); > + printf("MMCR0: %lx\n", regs->mmcr0); > + > + /* Validate EBBHR */ > + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ > + if (regs->ebbhr != opd->entry) > + return TEST_FAIL; > + #else > + if (regs->ebbhr != (unsigned long) ebb_handler) > + return TEST_FAIL; > + #endif > + > + /* Validate SIER */ > + if (regs->sier != SIER_EXP) > + return TEST_FAIL; > + > + /* Validate MMCR2 */ > + if (regs->mmcr2 != MMCR2_EXP) > + return TEST_FAIL; > + > + /* Validate MMCR0 */ > + if (regs->mmcr0 != MMCR0_EXP) > + return TEST_FAIL; > + > + return TEST_PASS; > +} > + > +int trace_ebb(pid_t child) > +{ > + struct ebb_r
Re: [PATCH V10 18/28] selftests, powerpc: Add ptrace tests for GPR/FPR registers
On Tue, 16 Feb 2016 14:29:48 +0530 Anshuman Khandual wrote: > This patch adds ptrace interface test for GPR/FPR registers. > This adds ptrace interface based helper functions related to > GPR/FPR access and some assembly helper functions related to > GPR/FPR registers. > I wonder if with a bit of thought we could share our assembly helper functions... https://patchwork.ozlabs.org/patch/589703/ Let me know, happy to meet in the middle and augment my series if needed. Thanks, Cyril > Signed-off-by: Anshuman Khandual > --- > tools/testing/selftests/powerpc/ptrace/Makefile| 3 +- > .../testing/selftests/powerpc/ptrace/ptrace-gpr.c | 191 +++ > .../testing/selftests/powerpc/ptrace/ptrace-gpr.h | 73 > tools/testing/selftests/powerpc/ptrace/ptrace.S| 131 + > tools/testing/selftests/powerpc/ptrace/ptrace.h| 208 > + > 5 files changed, 605 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h > create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace.S > > diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile > b/tools/testing/selftests/powerpc/ptrace/Makefile > index 8666ac0..f5f62d0 100644 > --- a/tools/testing/selftests/powerpc/ptrace/Makefile > +++ b/tools/testing/selftests/powerpc/ptrace/Makefile > @@ -1,4 +1,5 @@ > -TEST_PROGS := ptrace-ebb > +TEST_PROGS := ptrace-ebb ptrace-gpr > + > all: $(TEST_PROGS) > > $(TEST_PROGS): ../harness.c ptrace.S ../utils.c > diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c > b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c > new file mode 100644 > index 000..f84f0e4 > --- /dev/null > +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c > @@ -0,0 +1,191 @@ > +/* > + * Ptrace test for GPR/FPR registers > + * > + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > +#include "ptrace.h" > +#include "ptrace-gpr.h" > + > +/* Tracer and Tracee Shared Data */ > +int shm_id; > +volatile int *cptr, *pptr; > + > +extern void store_gpr(unsigned long *addr); > +extern void store_fpr(float *addr); > + > +float a = FPR_1; > +float b = FPR_2; > +float c = FPR_3; > + > +void gpr(void) > +{ > + unsigned long gpr_buf[18]; > + float fpr_buf[32]; > + > + cptr = (int *)shmat(shm_id, NULL, 0); > + > + asm __volatile__( > + "li 14, %[gpr_1];" > + "li 15, %[gpr_1];" > + "li 16, %[gpr_1];" > + "li 17, %[gpr_1];" > + "li 18, %[gpr_1];" > + "li 19, %[gpr_1];" > + "li 20, %[gpr_1];" > + "li 21, %[gpr_1];" > + "li 22, %[gpr_1];" > + "li 23, %[gpr_1];" > + "li 24, %[gpr_1];" > + "li 25, %[gpr_1];" > + "li 26, %[gpr_1];" > + "li 27, %[gpr_1];" > + "li 28, %[gpr_1];" > + "li 29, %[gpr_1];" > + "li 30, %[gpr_1];" > + "li 31, %[gpr_1];" > + > + "lfs 0, 0(%[flt_1]);" > + "lfs 1, 0(%[flt_1]);" > + "lfs 2, 0(%[flt_1]);" > + "lfs 3, 0(%[flt_1]);" > + "lfs 4, 0(%[flt_1]);" > + "lfs 5, 0(%[flt_1]);" > + "lfs 6, 0(%[flt_1]);" > + "lfs 7, 0(%[flt_1]);" > + "lfs 8, 0(%[flt_1]);" > + "lfs 9, 0(%[flt_1]);" > + "lfs 10, 0(%[flt_1]);" > + "lfs 11, 0(%[flt_1]);" > + "lfs 12, 0(%[flt_1]);" > + "lfs 13, 0(%[flt_1]);" > + "lfs 14, 0(%[flt_1]);" > + "lfs 15, 0(%[flt_1]);" > + "lfs 16, 0(%[flt_1]);" > + "lfs 17, 0(%[flt_1]);" > + "lfs 18, 0(%[flt_1]);" > + "lfs 19, 0(%[flt_1]);" > + "lfs 20, 0(%[flt_1]);" > + "lfs 21, 0(%[flt_1]);" > + "lfs 22, 0(%[flt_1]);" > + "lfs 23, 0(%[flt_1]);" > + "lfs 24, 0(%[flt_1]);" > + "lfs 25, 0(%[flt_1]);" > + "lfs 26, 0(%[flt_1]);" > + "lfs 27, 0(%[flt_1]);" > + "lfs 28, 0(%[flt_1]);" > + "lfs 29, 0(%[flt_1]);" > + "lfs 30, 0(%[flt_1]);" > + "lfs 31, 0(%[flt_1]);" > + > + : > + :[gpr_1]"i"(GPR_1), [flt_1] "r" (&a) > + : "memory", "r6", "r7", "r8", "r9", "r10", > + "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", > "r20", > + "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", > "r30", "r31" > + ); > + > + while(!cptr[0]); > + > + store_gpr(gpr_buf); > +
Re: [PATCH V10 02/28] powerpc, process: Add the function flush_tmregs_to_thread
On Wed, 02 Mar 2016 09:59:06 +0530 Anshuman Khandual wrote: > On 03/02/2016 05:45 AM, Cyril Bur wrote: > > On Tue, 16 Feb 2016 14:29:32 +0530 > > Anshuman Khandual wrote: > > > >> This patch creates a function flush_tmregs_to_thread which > >> will then be used by subsequent patches in this series. The > >> function checks for self tracing ptrace interface attempts > >> while in the TM context and logs appropriate warning message. > >> > > > > Hi Anshuman, > > > > You'll have to bare with me, my ptrace knowledge is non existent so you > > might > > have to walk me though some aspects. > > > > I have been playing with FPU/VMX and VSX saving so I thought I'd take a > > look. > > Sure. > > > > > >> Signed-off-by: Anshuman Khandual > >> --- > >> arch/powerpc/include/asm/switch_to.h | 8 > >> arch/powerpc/kernel/process.c| 20 > >> 2 files changed, 28 insertions(+) > >> > >> diff --git a/arch/powerpc/include/asm/switch_to.h > >> b/arch/powerpc/include/asm/switch_to.h > >> index 5b268b6..7b297bf 100644 > >> --- a/arch/powerpc/include/asm/switch_to.h > >> +++ b/arch/powerpc/include/asm/switch_to.h > >> @@ -70,6 +70,14 @@ static inline void disable_kernel_spe(void) > >> } > >> #endif > >> > >> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > >> +extern void flush_tmregs_to_thread(struct task_struct *); > >> +#else > >> +static inline void flush_tmregs_to_thread(struct task_struct *t) > >> +{ > >> +} > >> +#endif > >> + > >> static inline void clear_task_ebb(struct task_struct *t) > >> { > >> #ifdef CONFIG_PPC_BOOK3S_64 > >> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c > >> index dccc87e..2c4fa7f 100644 > >> --- a/arch/powerpc/kernel/process.c > >> +++ b/arch/powerpc/kernel/process.c > >> @@ -918,6 +918,26 @@ static inline void restore_sprs(struct thread_struct > >> *old_thread, > >> #endif > >> } > >> > > > > > > > >> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > >> +void flush_tmregs_to_thread(struct task_struct *tsk) > >> +{ > >> + /* > >> + * Process self tracing is not yet supported through > >> + * ptrace interface. Ptrace generic code should have > >> + * prevented this from happening in the first place. > >> + * Warn once here with the message, if some how it > >> + * is attempted. > >> + */ > >> + WARN_ONCE(tsk == current, > >> + "Not expecting ptrace on self: TM regs may be incorrect\n"); > >> + > >> + /* > >> + * If task is not current, it should have been flushed > >> + * already to it's thread_struct during __switch_to(). > >> + */ > > > > I totally agree except this highlights something that I notice in subsequent > > patches, and existing code. All the *_{get,set}() functions call > > flush_*_to_thread() when, as per your comment (and my understanding of task > > switching) there really shouldn't be a need to do that. My only thought is > > that > > this could be a relic of uniprocessor days when it would have been > > necessary but > > Anton recently stripped that out. Are you able to shed some light here? > > Its been sometime I had looked into this aspect of the series. I remember > Michael Neuling and myself discussed about this and settled on a single > WARN_ON here as nothing else was required to be done in the function. It > may be possible that all the flush_*_to_thread() functions used else where > are because of uniprocessor concerns. I dont understand completely our > context save/restore paths including the lazy ones. I believed that these > flush_*_to_thread() routines just made sure task struct has the latest > values of the thread context in case of some complicated save/restore > paths might not have done the complete save at that point in time. > Well as you note in the comment though, it should be done since we've gone through __switch_to()... > If you think that all these flush_*_to_thread() functions used through > out POWER ptrace need review to see whether they are required or not > anymore I would suggest we should do it as a separate patch after this > series and I am willing to work with you on that. I THINK your patches are correct and we're just performing needless flush_*_to_thread() calls now in which case its fine and we can review laster, my concern is that I've been wrong before so having flush_tmregs_to_thread() do nothing worries me. I wonder if Mr Neuling or Mr Ellerman have anything to say on the subject... > > > > > The reason I ask is that if the flush_*_to_thread() calls ARE actually > > important then I worry that this function is inadequate... > > I guess we went through that and finally settled on WARN_ON once but dont > remember the exact context now. Will look into all previous discussions > on this and get back. Thanks, Cyril >
Re: [PATCH V5] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Mon, 2017-06-26 at 11:02 +0530, Shilpasri G Bhat wrote: > In P9, OCC (On-Chip-Controller) supports shared memory based > commad-response interface. Within the shared memory there is an OPAL > command buffer and OCC response buffer that can be used to send > inband commands to OCC. This patch adds a platform driver to support > the command/response interface between OCC and the host. > I feel obliged to check that an occ request_id can be zero, I'm sure its fine - just have to ask, zero is so often special. I should also point out I don't know must about how the OCC works or anything so I would be best if there were other eyes on this. Provided zero is an ok request_id: Reviewed-by: Cyril Bur > Signed-off-by: Shilpasri G Bhat > --- > The skiboot patch for the interface is posted here: > https://lists.ozlabs.org/pipermail/skiboot/2017-June/007960.html > > Changes from V4: > - Add token as a parameter to the opal_occ_command() > - Use per-occ counter for command request_id instead of using async > token. > > arch/powerpc/include/asm/opal-api.h| 41 +++- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 303 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 8 + > 6 files changed, 356 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index cb3e624..011d86c 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,10 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_OCC_INVALID_STATE -33 > +#define OPAL_OCC_BUSY-34 > +#define OPAL_OCC_CMD_TIMEOUT -35 > +#define OPAL_OCC_RSP_MISMATCH-36 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +194,8 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_OCC_COMMAND 149 > +#define OPAL_LAST149 > > /* Device tree flags */ > > @@ -829,6 +834,40 @@ struct opal_prd_msg_header { > > struct opal_prd_msg; > > +enum occ_cmd { > + OCC_CMD_AMESTER_PASS_THRU = 0, > + OCC_CMD_CLEAR_SENSOR_DATA, > + OCC_CMD_SET_POWER_CAP, > + OCC_CMD_SET_POWER_SHIFTING_RATIO, > + OCC_CMD_SELECT_SENSOR_GROUPS, > + OCC_CMD_LAST > +}; > + > +struct opal_occ_cmd_rsp_msg { > + __be64 cdata; > + __be64 rdata; > + __be16 cdata_size; > + __be16 rdata_size; > + u8 cmd; > + u8 request_id; > + u8 status; > +}; > + > +struct opal_occ_cmd_data { > + __be16 size; > + u8 cmd; > + u8 data[]; > +}; > + > +struct opal_occ_rsp_data { > + __be16 size; > + u8 status; > + u8 data[]; > +}; > + > +#define MAX_OPAL_CMD_DATA_LENGTH4090 > +#define MAX_OCC_RSP_DATA_LENGTH 8698 > + > #define OCC_RESET 0 > #define OCC_LOAD1 > #define OCC_THROTTLE2 > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 03ed493..84659bd 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg, > + int token, bool retry); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..f5f0902 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.
Re: [PATCH v2 2/3] misc: aspeed-lpc: Request and enable LPC clock
On Mon, 2018-02-19 at 17:54 +1030, Joel Stanley wrote: > The LPC device needs to ensure it's clock is enabled before it can do > anything. > > In the past the clock was enabled and left running by u-boot, however > Linux now has an upstream clock driver that disables unused clocks. > > Tested-by: Lei YU > Reviewed-by: Andrew Jeffery > Signed-off-by: Joel Stanley Reviewed-by: Cyril Bur > --- > drivers/misc/aspeed-lpc-ctrl.c | 26 +++--- > 1 file changed, 23 insertions(+), 3 deletions(-) > > diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c > index b5439643f54b..1827b7aa6674 100644 > --- a/drivers/misc/aspeed-lpc-ctrl.c > +++ b/drivers/misc/aspeed-lpc-ctrl.c > @@ -7,6 +7,7 @@ > * 2 of the License, or (at your option) any later version. > */ > > +#include > #include > #include > #include > @@ -26,6 +27,7 @@ > struct aspeed_lpc_ctrl { > struct miscdevice miscdev; > struct regmap *regmap; > + struct clk *clk; > phys_addr_t mem_base; > resource_size_t mem_size; > u32 pnor_size; > @@ -221,16 +223,33 @@ static int aspeed_lpc_ctrl_probe(struct platform_device > *pdev) > return -ENODEV; > } > > + lpc_ctrl->clk = devm_clk_get(dev, NULL); > + if (IS_ERR(lpc_ctrl->clk)) { > + dev_err(dev, "couldn't get clock\n"); > + return PTR_ERR(lpc_ctrl->clk); > + } > + rc = clk_prepare_enable(lpc_ctrl->clk); > + if (rc) { > + dev_err(dev, "couldn't enable clock\n"); > + return rc; > + } > + > lpc_ctrl->miscdev.minor = MISC_DYNAMIC_MINOR; > lpc_ctrl->miscdev.name = DEVICE_NAME; > lpc_ctrl->miscdev.fops = &aspeed_lpc_ctrl_fops; > lpc_ctrl->miscdev.parent = dev; > rc = misc_register(&lpc_ctrl->miscdev); > - if (rc) > + if (rc) { > dev_err(dev, "Unable to register device\n"); > - else > - dev_info(dev, "Loaded at %pr\n", &resm); > + goto err; > + } > + > + dev_info(dev, "Loaded at %pr\n", &resm); > + > + return 0; > > +err: > + clk_disable_unprepare(lpc_ctrl->clk); > return rc; > } > > @@ -239,6 +258,7 @@ static int aspeed_lpc_ctrl_remove(struct platform_device > *pdev) > struct aspeed_lpc_ctrl *lpc_ctrl = dev_get_drvdata(&pdev->dev); > > misc_deregister(&lpc_ctrl->miscdev); > + clk_disable_unprepare(lpc_ctrl->clk); > > return 0; > }
Re: [PATCH v2 3/3] misc: aspeed-lpc-ctrl: Enable FWH and A2H bridge cycles
On Mon, 2018-02-19 at 17:54 +1030, Joel Stanley wrote: > To date this driver has relied on prevous state from out of tree hacks > and vendor u-boot trees in order to have the host be able to access > data over the LPC bus. > > Now we explicitly enable the AHB to LPC bridge and FWH cycles from when > the user first configures the address to map. We chose to do this then > as before that time there is no way for the kernel to know where it is > safe to point the LPC window. > > Tested-by: Lei YU > Reviewed-by: Andrew Jeffery > Signed-off-by: Joel Stanley Reviewed-by: Cyril Bur > --- > drivers/misc/aspeed-lpc-ctrl.c | 18 -- > 1 file changed, 16 insertions(+), 2 deletions(-) > > diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c > index 1827b7aa6674..a024f8042259 100644 > --- a/drivers/misc/aspeed-lpc-ctrl.c > +++ b/drivers/misc/aspeed-lpc-ctrl.c > @@ -21,6 +21,10 @@ > > #define DEVICE_NAME "aspeed-lpc-ctrl" > > +#define HICR5 0x0 > +#define HICR5_ENL2H BIT(8) > +#define HICR5_ENFWH BIT(10) > + > #define HICR7 0x8 > #define HICR8 0xc > > @@ -155,8 +159,18 @@ static long aspeed_lpc_ctrl_ioctl(struct file *file, > unsigned int cmd, > if (rc) > return rc; > > - return regmap_write(lpc_ctrl->regmap, HICR8, > - (~(map.size - 1)) | ((map.size >> 16) - 1)); > + rc = regmap_write(lpc_ctrl->regmap, HICR8, > + (~(map.size - 1)) | ((map.size >> 16) - 1)); > + if (rc) > + return rc; > + > + /* > + * Enable LPC FHW cycles. This is required for the host to > + * access the regions specified. > + */ > + return regmap_update_bits(lpc_ctrl->regmap, HICR5, > + HICR5_ENFWH | HICR5_ENL2H, > + HICR5_ENFWH | HICR5_ENL2H); > } > > return -EINVAL;
Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver
Hi, I haven't heard anything about this driver. I'm trying to interpret if the silence is because there is something fundamentally wrong with the driver or is it because it doesn't use any of the mailbox infrastructure it is being ignored. Either way I'm happy to address the problems - in the event of there being something wrong, I'm more than happy to take feedback and address problems. If you would prefer to see it moved somewhere into char/misc, this is also something I can do. Moving into char/misc is my preferred option, I had a look at the mailbox infrastructure and it seems a bit heavyweight for what this ultimately very simple driver is doing. If you would be so kind as to let me know which direction to take. Thanks, Cyril On Wed, 2017-02-08 at 10:36 +1100, Cyril Bur wrote: > This provides access to the mbox registers on the ast2400 and ast2500 > SoCs. > > This driver allows arbitrary reads and writes to the 16 data registers as > the other end may have configured the mbox hardware to provide an > interrupt when a specific register gets written to. > > Signed-off-by: Cyril Bur > --- > V2: >s/ASpeed/Aspeed/ >Reword Kconfig options >Use tristate for config symbol > > drivers/mailbox/Kconfig | 8 + > drivers/mailbox/Makefile | 2 + > drivers/mailbox/aspeed-mbox.c | 334 > ++ > 3 files changed, 344 insertions(+) > create mode 100644 drivers/mailbox/aspeed-mbox.c > > diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig > index ceff415f201c..e24044d5c219 100644 > --- a/drivers/mailbox/Kconfig > +++ b/drivers/mailbox/Kconfig > @@ -152,4 +152,12 @@ config BCM_PDC_MBOX > Mailbox implementation for the Broadcom PDC ring manager, > which provides access to various offload engines on Broadcom > SoCs. Say Y here if you want to use the Broadcom PDC. > + > +config ASPEED_LPC_MBOX > + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON > + tristate "Aspeed LPC Mailbox Controller" > + ---help--- > + Provides a driver for the MBOX registers found on Aspeed SOCs > + (AST2400 and AST2500). This driver provides a device for aspeed > + mbox registers > endif > diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile > index 7dde4f609ae8..73165a79b517 100644 > --- a/drivers/mailbox/Makefile > +++ b/drivers/mailbox/Makefile > @@ -31,3 +31,5 @@ obj-$(CONFIG_HI6220_MBOX) += hi6220-mailbox.o > obj-$(CONFIG_BCM_PDC_MBOX) += bcm-pdc-mailbox.o > > obj-$(CONFIG_TEGRA_HSP_MBOX) += tegra-hsp.o > + > +obj-$(CONFIG_ASPEED_LPC_MBOX)+= aspeed-mbox.o > diff --git a/drivers/mailbox/aspeed-mbox.c b/drivers/mailbox/aspeed-mbox.c > new file mode 100644 > index ..0f0c711bafee > --- /dev/null > +++ b/drivers/mailbox/aspeed-mbox.c > @@ -0,0 +1,334 @@ > +/* > + * Copyright 2017 IBM Corporation > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#define DEVICE_NAME "aspeed-mbox" > + > +#define ASPEED_MBOX_NUM_REGS 16 > + > +#define ASPEED_MBOX_DATA_0 0x00 > +#define ASPEED_MBOX_STATUS_0 0x40 > +#define ASPEED_MBOX_STATUS_1 0x44 > +#define ASPEED_MBOX_BMC_CTRL 0x48 > +#define ASPEED_MBOX_CTRL_RECV BIT(7) > +#define ASPEED_MBOX_CTRL_MASK BIT(1) > +#define ASPEED_MBOX_CTRL_SEND BIT(0) > +#define ASPEED_MBOX_HOST_CTRL 0x4c > +#define ASPEED_MBOX_INTERRUPT_0 0x50 > +#define ASPEED_MBOX_INTERRUPT_1 0x54 > + > +struct aspeed_mbox { > + struct miscdevice miscdev; > + struct regmap *regmap; > + unsigned intbase; > + wait_queue_head_t queue; > + struct mutexmutex; > +}; > + > +static atomic_t aspeed_mbox_open_count = ATOMIC_INIT(0); > + > +static u8 aspeed_mbox_inb(struct aspeed_mbox *mbox, int reg) > +{ > + /* > + * The mbox registers are actually only one byte but are addressed > + * four bytes apart. The other three bytes are marked 'reserved', > + * they *should* be zero but lets not rely on it. > + * I am going to rely on the fact we can casually read/write to them... > + */ > + unsigned int val = 0xff; /* If regmap throws an error return 0xff */ > + int rc = regmap_read(mbox->regmap, mbox->base + reg, &am
Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver
On Mon, 2017-09-04 at 20:13 +0530, Jassi Brar wrote: > On Mon, Sep 4, 2017 at 12:47 PM, Cyril Bur wrote: > > Hi, > > > > I haven't heard anything about this driver. I'm trying to interpret if > > the silence is because there is something fundamentally wrong with the > > driver or is it because it doesn't use any of the mailbox > > infrastructure it is being ignored. > > > > Its the latter. Great! Thanks for your response, I'll resend it under char/misc. Kind regards, Cyril
Re: [PATCH v2] drivers/mailbox: Add Aspeed mailbox driver
On Tue, 2017-09-05 at 08:25 +0200, Greg KH wrote: > On Tue, Sep 05, 2017 at 09:37:19AM +1000, Cyril Bur wrote: > > On Mon, 2017-09-04 at 20:13 +0530, Jassi Brar wrote: > > > On Mon, Sep 4, 2017 at 12:47 PM, Cyril Bur wrote: > > > > Hi, > > > > > > > > I haven't heard anything about this driver. I'm trying to interpret if > > > > the silence is because there is something fundamentally wrong with the > > > > driver or is it because it doesn't use any of the mailbox > > > > infrastructure it is being ignored. > > > > > > > > > > Its the latter. > > > > Great! Thanks for your response, I'll resend it under char/misc. > > Wait, no, please use the mailbox infrastructure for this, making a > one-off char/misc driver isn't ok when there is a framework for you to > use. > Thanks very much for your response. Yes you're absolutely correct, I'll send it to drivers/mailbox. I look forward to working more closely with Jassi. I'll be sure to fully and completely exploit all the features of this mailbox framework. Looking forward to continued development, Thanks very much to all involved, Cyril > thanks, > > greg k-h
[PATCH] checkpatch: Silence perl 5.26.0 unescaped left brace warnings
As of perl 5, version 26, subversion 0 (v5.26.0) some new warnings have occurred when running checkpatch. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ <-- HERE \s*/ at scripts/checkpatch.pl line 3544. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(.\s*){ <-- HERE \s*/ at scripts/checkpatch.pl line 3885. Unescaped left brace in regex is deprecated here (and will be fatal in Perl 5.30), passed through in regex; marked by <-- HERE in m/^(\+.*(?:do|\))){ <-- HERE / at scripts/checkpatch.pl line 4374. It seems perfectly reasonable to do as the warning suggests and simply escape the left brace in these three locations. Signed-off-by: Cyril Bur --- scripts/checkpatch.pl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4b9569fa931b..33e50614a5d9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3541,7 +3541,7 @@ sub process { $fixedline =~ s/\s*=\s*$/ = {/; fix_insert_line($fixlinenr, $fixedline); $fixedline = $line; - $fixedline =~ s/^(.\s*){\s*/$1/; + $fixedline =~ s/^(.\s*)\{\s*/$1/; fix_insert_line($fixlinenr, $fixedline); } } @@ -3882,7 +3882,7 @@ sub process { my $fixedline = rtrim($prevrawline) . " {"; fix_insert_line($fixlinenr, $fixedline); $fixedline = $rawline; - $fixedline =~ s/^(.\s*){\s*/$1\t/; + $fixedline =~ s/^(.\s*)\{\s*/$1\t/; if ($fixedline !~ /^\+\s*$/) { fix_insert_line($fixlinenr, $fixedline); } @@ -4371,7 +4371,7 @@ sub process { if (ERROR("SPACING", "space required before the open brace '{'\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/; + $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/; } } -- 2.13.0
Re: [PATCH V2 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Tue, 2017-06-13 at 23:26 +0530, Shilpasri G Bhat wrote: > In P9, OCC (On-Chip-Controller) supports shared memory based > commad-response interface. Within the shared memory there is an OPAL > command buffer and OCC response buffer that can be used to send > inband commands to OCC. This patch adds a platform driver to support > the command/response interface between OCC and the host. > Hi Shilpasri, I have another question about printing of errors, see below. Otherwise looks better. When you're dropping the locks I wonder why you couldn't just use atomic_set(). Finally, as you say below, if a user does a read() without having done a write(), there isn't really any guarantee as to what they get. They will get the previous response if there had been a write() call, what about if there had never been a write() call? Could this be considered a security problem? Imagine that one program is using the file descriptor and a malicious program keeps trying to get the file descriptor and then the program which has the file descriptor crashes? What information might the malicious program get? More inline, Cyril > Signed-off-by: Shilpasri G Bhat > --- > Changes from V2: > - Remove spinlock and use atomic_t for setting and clearing flags > - Fix endian swapping > - Use pa() and va() before and after opal call for accessing buffer > data > - Replace (u8 *) with __be64 for buffer pointers > - User reads the previous OCC response if the user does a read() > before a write(). Is this wrong? > - Add WARN_ON check for nr_occs > 254 > > arch/powerpc/include/asm/opal-api.h| 41 +++- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 314 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 8 + > 6 files changed, 367 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index cb3e624..011d86c 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,10 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_OCC_INVALID_STATE -33 > +#define OPAL_OCC_BUSY-34 > +#define OPAL_OCC_CMD_TIMEOUT -35 > +#define OPAL_OCC_RSP_MISMATCH-36 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +194,8 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_OCC_COMMAND 149 > +#define OPAL_LAST149 > > /* Device tree flags */ > > @@ -829,6 +834,40 @@ struct opal_prd_msg_header { > > struct opal_prd_msg; > > +enum occ_cmd { > + OCC_CMD_AMESTER_PASS_THRU = 0, > + OCC_CMD_CLEAR_SENSOR_DATA, > + OCC_CMD_SET_POWER_CAP, > + OCC_CMD_SET_POWER_SHIFTING_RATIO, > + OCC_CMD_SELECT_SENSOR_GROUPS, > + OCC_CMD_LAST > +}; > + > +struct opal_occ_cmd_rsp_msg { > + __be64 cdata; > + __be64 rdata; > + __be16 cdata_size; > + __be16 rdata_size; > + u8 cmd; > + u8 request_id; > + u8 status; > +}; > + > +struct opal_occ_cmd_data { > + __be16 size; > + u8 cmd; > + u8 data[]; > +}; > + > +struct opal_occ_rsp_data { > + __be16 size; > + u8 status; > + u8 data[]; > +}; > + > +#define MAX_OPAL_CMD_DATA_LENGTH4090 > +#define MAX_OCC_RSP_DATA_LENGTH 8698 > + > #define OCC_RESET 0 > #define OCC_LOAD1 > #define OCC_THROTTLE2 > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 03ed493..e55ed79 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg, > + bool retry); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..f5f0902 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > o
Re: [RFC 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Mon, 2017-06-05 at 11:43 +0530, Shilpasri G Bhat wrote: > In P9, OCC (On-Chip-Controller) supports shared memory based > commad-response interface. Within the shared memory there is an OPAL > command buffer and OCC response buffer that can be used to send > inband commands to OCC. This patch adds a platform driver to support > the command/response interface between OCC and the host. > > Signed-off-by: Shilpasri G Bhat Hi Shilpasri, Is there somewhere interface (for userspace) is defined? My main concern is: who is doing the endian swapping - I assume once the data leaves Linux and goes into skiboot everything will be in big endian. I've looked through your userspace tool in your skiboot patches and I don't see anywhere where it deals with endian. Linux does some dealing here but since userspace passes an arbitrary buffer, Linux can't convert that. Also I notice that you'll block and wait for the response from skiboot in the write() call from userspace. Just as a fairly easy optimisation I would have though it would make more sense to send the async command to skiboot in the write() but actually block on opal_async_wait_response() when userspace does the read() to get the response data. This provides the advantage that skiboot and the occ might be able to process the request while userspace gets around the calling read() as opposed to forcing userspace to wait in the write() call. Having said that, I don't think this path is particularly performance critical, so this doesn't seem to be a big problem. A few more questions below. Cyril > --- > arch/powerpc/include/asm/opal-api.h| 40 ++- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 349 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 8 + > 6 files changed, 401 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index cb3e624..d75fbb9 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,10 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_OCC_INVALID_STATE -33 > +#define OPAL_OCC_BUSY-34 > +#define OPAL_OCC_CMD_TIMEOUT -35 > +#define OPAL_OCC_RSP_MISMATCH-36 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +194,8 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_OCC_COMMAND 149 > +#define OPAL_LAST149 > > /* Device tree flags */ > > @@ -829,6 +834,39 @@ struct opal_prd_msg_header { > > struct opal_prd_msg; > > +enum occ_cmd { > + OCC_CMD_AMESTER_PASS_THRU = 0, > + OCC_CMD_CLEAR_SENSOR_DATA, > + OCC_CMD_SET_POWER_CAP, > + OCC_CMD_SET_POWER_SHIFTING_RATIO, > + OCC_CMD_LAST > +}; > + > +struct opal_occ_cmd_rsp_msg { > + u8 *cdata; > + u8 *rdata; > + __be16 cdata_size; > + __be16 rdata_size; > + u8 cmd; > + u8 request_id; > + u8 status; > +}; > + > +struct opal_occ_cmd_data { > + u16 size; > + u8 cmd; > + u8 data[]; > +}; > + > +struct opal_occ_rsp_data { > + u16 size; > + u8 status; > + u8 data[]; > +}; > + > +#define MAX_OPAL_CMD_DATA_LENGTH4090 > +#define MAX_OCC_RSP_DATA_LENGTH 8698 > + > #define OCC_RESET 0 > #define OCC_LOAD1 > #define OCC_THROTTLE2 > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 03ed493..e55ed79 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg, > + bool retry); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..f5f0902 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= o
Re: [PATCH V9 1/3] powernv: powercap: Add support for powercap framework
On Mon, 2017-07-31 at 07:54 +0530, Shilpasri G Bhat wrote: > Adds a generic powercap framework to change the system powercap > inband through OPAL-OCC command/response interface. > > Signed-off-by: Shilpasri G Bhat > --- > Changes from V8: > - Use __pa() while passing pointer in opal call > - Use mutex_lock_interruptible() > - Fix error codes returned to user > - Allocate and add sysfs attributes in a single loop > > arch/powerpc/include/asm/opal-api.h| 5 +- > arch/powerpc/include/asm/opal.h| 4 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-powercap.c | 243 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + > arch/powerpc/platforms/powernv/opal.c | 4 + > 6 files changed, 258 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index 3130a73..c3e0c4a 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,7 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_TIMEOUT -33 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +191,9 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_GET_POWERCAP152 > +#define OPAL_SET_POWERCAP153 > +#define OPAL_LAST153 > > /* Device tree flags */ > > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 588fb1c..ec2087c 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -267,6 +267,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp, > int64_t opal_xive_free_irq(uint32_t girq); > int64_t opal_xive_sync(uint32_t type, uint32_t id); > int64_t opal_xive_dump(uint32_t type, uint32_t id); > +int opal_get_powercap(u32 handle, int token, u32 *pcap); > +int opal_set_powercap(u32 handle, int token, u32 pcap); > > /* Internal functions */ > extern int early_init_dt_scan_opal(unsigned long node, const char *uname, > @@ -345,6 +347,8 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +void opal_powercap_init(void); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..e79f806 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.o > +obj-y+= opal-kmsg.o opal-powercap.o > > obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o > obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o > diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c > b/arch/powerpc/platforms/powernv/opal-powercap.c > new file mode 100644 > index 000..9be5093 > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-powercap.c > @@ -0,0 +1,243 @@ > +/* > + * PowerNV OPAL Powercap interface > + * > + * Copyright 2017 IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#define pr_fmt(fmt) "opal-powercap: " fmt > + > +#include > +#include > +#include > + > +#include > + > +DEFINE_MUTEX(powercap_mutex); > + > +static struct kobject *powercap_kobj; > + > +struct powercap_attr { > + u32 handle; > + struct kobj_attribute attr; > +}; > + > +static struct pcap { > + struct attribute_group pg; > + struct powercap_attr *pattrs; > +} *pcaps; > + > +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute > *attr, > + char *buf) > +{ > + struct powercap_attr *pcap_attr = container_of(attr, > + struct powercap_attr, attr); > + struct opal_msg msg; > + u32 pcap; > + int ret, token; > + > + token = opal_async_get_token_interruptible(); > + if (token < 0) { > + pr_devel("Failed to get
Re: [PATCH V3 2/2] powerpc/powernv : Add support for OPAL-OCC command/response interface
On Mon, 2017-06-19 at 11:44 +0530, Shilpasri G Bhat wrote: > In P9, OCC (On-Chip-Controller) supports shared memory based > commad-response interface. Within the shared memory there is an OPAL > command buffer and OCC response buffer that can be used to send > inband commands to OCC. This patch adds a platform driver to support > the command/response interface between OCC and the host. > Hi Shilpasri, Looks really nice now! I do have a few questions on the new stuff around locking for read()ing the response. I've put one inline in the read() code, feel free to disagree :) My other question is now sort of the opposite problem, what if you have two threads, one does a write() so now your read()ing mutex allows reads and then thread A does write() at the 'same' time as thread B does read(). Is there a risk of corruption? Would it be good to also hold cmd_in_progress in read(). I'm also assuming that two writes is fine and that loosing the previous response is by design - users might not care about the response, or they might want ignore it or (as often happens in userspace) they don't feel the need to check return codes. > Signed-off-by: Shilpasri G Bhat > --- > Changes from V2: > - Use atomic_set while dropping locks > - Add rsp_consumed flag to protect the read() > - Change the logging level of prints > - Add MAX_POSSIBLE_CHIPS > - Move the endian conversion of data to opal_occ_cmd_prepare() > > arch/powerpc/include/asm/opal-api.h| 41 +++- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 307 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 8 + > 6 files changed, 360 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index cb3e624..011d86c 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,10 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_OCC_INVALID_STATE -33 > +#define OPAL_OCC_BUSY-34 > +#define OPAL_OCC_CMD_TIMEOUT -35 > +#define OPAL_OCC_RSP_MISMATCH-36 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +194,8 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_OCC_COMMAND 149 > +#define OPAL_LAST149 > > /* Device tree flags */ > > @@ -829,6 +834,40 @@ struct opal_prd_msg_header { > > struct opal_prd_msg; > > +enum occ_cmd { > + OCC_CMD_AMESTER_PASS_THRU = 0, > + OCC_CMD_CLEAR_SENSOR_DATA, > + OCC_CMD_SET_POWER_CAP, > + OCC_CMD_SET_POWER_SHIFTING_RATIO, > + OCC_CMD_SELECT_SENSOR_GROUPS, > + OCC_CMD_LAST > +}; > + > +struct opal_occ_cmd_rsp_msg { > + __be64 cdata; > + __be64 rdata; > + __be16 cdata_size; > + __be16 rdata_size; > + u8 cmd; > + u8 request_id; > + u8 status; > +}; > + > +struct opal_occ_cmd_data { > + __be16 size; > + u8 cmd; > + u8 data[]; > +}; > + > +struct opal_occ_rsp_data { > + __be16 size; > + u8 status; > + u8 data[]; > +}; > + > +#define MAX_OPAL_CMD_DATA_LENGTH4090 > +#define MAX_OCC_RSP_DATA_LENGTH 8698 > + > #define OCC_RESET 0 > #define OCC_LOAD1 > #define OCC_THROTTLE2 > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 03ed493..e55ed79 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -346,6 +346,9 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg, > + bool retry); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..f5f0902 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.o
Re: [PATCH V8 1/3] powernv: powercap: Add support for powercap framework
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote: > Adds a generic powercap framework to change the system powercap > inband through OPAL-OCC command/response interface. > > Signed-off-by: Shilpasri G Bhat > --- > Changes from V7: > - Replaced sscanf with kstrtoint > > arch/powerpc/include/asm/opal-api.h| 5 +- > arch/powerpc/include/asm/opal.h| 4 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-powercap.c | 237 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + > arch/powerpc/platforms/powernv/opal.c | 4 + > 6 files changed, 252 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index 3130a73..c3e0c4a 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -42,6 +42,7 @@ > #define OPAL_I2C_STOP_ERR-24 > #define OPAL_XIVE_PROVISIONING -31 > #define OPAL_XIVE_FREE_ACTIVE-32 > +#define OPAL_TIMEOUT -33 > > /* API Tokens (in r0) */ > #define OPAL_INVALID_CALL -1 > @@ -190,7 +191,9 @@ > #define OPAL_NPU_INIT_CONTEXT146 > #define OPAL_NPU_DESTROY_CONTEXT 147 > #define OPAL_NPU_MAP_LPAR148 > -#define OPAL_LAST148 > +#define OPAL_GET_POWERCAP152 > +#define OPAL_SET_POWERCAP153 > +#define OPAL_LAST153 > > /* Device tree flags */ > > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 588fb1c..ec2087c 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -267,6 +267,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp, > int64_t opal_xive_free_irq(uint32_t girq); > int64_t opal_xive_sync(uint32_t type, uint32_t id); > int64_t opal_xive_dump(uint32_t type, uint32_t id); > +int opal_get_powercap(u32 handle, int token, u32 *pcap); > +int opal_set_powercap(u32 handle, int token, u32 pcap); > > /* Internal functions */ > extern int early_init_dt_scan_opal(unsigned long node, const char *uname, > @@ -345,6 +347,8 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_wake_poller(void); > > +void opal_powercap_init(void); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index b5d98cb..e79f806 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.o > +obj-y+= opal-kmsg.o opal-powercap.o > > obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o > obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o > diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c > b/arch/powerpc/platforms/powernv/opal-powercap.c > new file mode 100644 > index 000..7c57f4b > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-powercap.c > @@ -0,0 +1,237 @@ > +/* > + * PowerNV OPAL Powercap interface > + * > + * Copyright 2017 IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#define pr_fmt(fmt) "opal-powercap: " fmt > + > +#include > +#include > +#include > + > +#include > + > +DEFINE_MUTEX(powercap_mutex); > + > +static struct kobject *powercap_kobj; > + > +struct powercap_attr { > + u32 handle; > + struct kobj_attribute attr; > +}; > + > +static struct attribute_group *pattr_groups; > +static struct powercap_attr *pcap_attrs; > + > +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute > *attr, > + char *buf) > +{ > + struct powercap_attr *pcap_attr = container_of(attr, > + struct powercap_attr, attr); > + struct opal_msg msg; > + u32 pcap; > + int ret, token; > + > + token = opal_async_get_token_interruptible(); > + if (token < 0) { > + pr_devel("Failed to get token\n"); > + return token; > + } > + > + mutex_lock(&powercap_mutex); If this is purely a userspace interface, shouldn't this be mutex_l
Re: [PATCH V8 2/3] powernv: Add support to set power-shifting-ratio
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote: > This patch adds support to set power-shifting-ratio for CPU-GPU which > is used by OCC power capping algorithm. > > Signed-off-by: Shilpasri G Bhat Hi Shilpasri, I started looking though this - a lot the comments to patch 1/3 apply here so I'll stop repeating myself :). Thanks, Cyril > --- > Changes from V7: > - Replaced sscanf with kstrtoint > > arch/powerpc/include/asm/opal-api.h| 4 +- > arch/powerpc/include/asm/opal.h| 3 + > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-psr.c | 169 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + > arch/powerpc/platforms/powernv/opal.c | 3 + > 6 files changed, 181 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/platforms/powernv/opal-psr.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index c3e0c4a..0d37315 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -193,7 +193,9 @@ > #define OPAL_NPU_MAP_LPAR148 > #define OPAL_GET_POWERCAP152 > #define OPAL_SET_POWERCAP153 > -#define OPAL_LAST153 > +#define OPAL_GET_PSR 154 > +#define OPAL_SET_PSR 155 > +#define OPAL_LAST155 > > /* Device tree flags */ > > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index ec2087c..58b30a4 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -269,6 +269,8 @@ int64_t opal_xive_set_vp_info(uint64_t vp, > int64_t opal_xive_dump(uint32_t type, uint32_t id); > int opal_get_powercap(u32 handle, int token, u32 *pcap); > int opal_set_powercap(u32 handle, int token, u32 pcap); > +int opal_get_power_shifting_ratio(u32 handle, int token, u32 *psr); > +int opal_set_power_shifting_ratio(u32 handle, int token, u32 psr); > > /* Internal functions */ > extern int early_init_dt_scan_opal(unsigned long node, const char *uname, > @@ -348,6 +350,7 @@ static inline int opal_get_async_rc(struct opal_msg msg) > void opal_wake_poller(void); > > void opal_powercap_init(void); > +void opal_psr_init(void); > > #endif /* __ASSEMBLY__ */ > > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index e79f806..9ed7d33 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.o opal-powercap.o > +obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o > > obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o > obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o > diff --git a/arch/powerpc/platforms/powernv/opal-psr.c > b/arch/powerpc/platforms/powernv/opal-psr.c > new file mode 100644 > index 000..07e3f78 > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-psr.c > @@ -0,0 +1,169 @@ > +/* > + * PowerNV OPAL Power-Shifting-Ratio interface > + * > + * Copyright 2017 IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ > + > +#define pr_fmt(fmt) "opal-psr: " fmt > + > +#include > +#include > +#include > + > +#include > + > +DEFINE_MUTEX(psr_mutex); > + > +static struct kobject *psr_kobj; > + > +struct psr_attr { > + u32 handle; > + struct kobj_attribute attr; > +}; > + > +static struct psr_attr *psr_attrs; > +static struct kobject *psr_kobj; > + > +static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr, > + char *buf) > +{ > + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); > + struct opal_msg msg; > + int psr, ret, token; > + > + token = opal_async_get_token_interruptible(); > + if (token < 0) { > + pr_devel("Failed to get token\n"); > + return token; > + } > + > + mutex_lock(&psr_mutex); > + ret = opal_get_power_shifting_ratio(psr_attr->handle, token, &psr); __pa() > + switch (ret) { > + case OPAL_ASYNC_COMPLETION: > + ret = opal_async_wait_response(token, &msg); > + if (ret) { > + pr_devel("Failed to wait
Re: [PATCH V8 3/3] powernv: Add support to clear sensor groups data
On Wed, 2017-07-26 at 10:35 +0530, Shilpasri G Bhat wrote: > Adds support for clearing different sensor groups. OCC inband sensor > groups like CSM, Profiler, Job Scheduler can be cleared using this > driver. The min/max of all sensors belonging to these sensor groups > will be cleared. > Hi Shilpasri, I think also some comments from v1 also apply here. Other comments inline Thanks, Cyril > Signed-off-by: Shilpasri G Bhat > --- > Changes from V7: > - s/send_occ_command/opal_sensor_groups_clear_history > > arch/powerpc/include/asm/opal-api.h| 3 +- > arch/powerpc/include/asm/opal.h| 2 + > arch/powerpc/include/uapi/asm/opal-occ.h | 23 ++ > arch/powerpc/platforms/powernv/Makefile| 2 +- > arch/powerpc/platforms/powernv/opal-occ.c | 109 > + > arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + > arch/powerpc/platforms/powernv/opal.c | 3 + > 7 files changed, 141 insertions(+), 2 deletions(-) > create mode 100644 arch/powerpc/include/uapi/asm/opal-occ.h > create mode 100644 arch/powerpc/platforms/powernv/opal-occ.c > > diff --git a/arch/powerpc/include/asm/opal-api.h > b/arch/powerpc/include/asm/opal-api.h > index 0d37315..342738a 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -195,7 +195,8 @@ > #define OPAL_SET_POWERCAP153 > #define OPAL_GET_PSR 154 > #define OPAL_SET_PSR 155 > -#define OPAL_LAST155 > +#define OPAL_SENSOR_GROUPS_CLEAR 156 > +#define OPAL_LAST156 > > /* Device tree flags */ > > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 58b30a4..92db6af 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -271,6 +271,7 @@ int64_t opal_xive_set_vp_info(uint64_t vp, > int opal_set_powercap(u32 handle, int token, u32 pcap); > int opal_get_power_shifting_ratio(u32 handle, int token, u32 *psr); > int opal_set_power_shifting_ratio(u32 handle, int token, u32 psr); > +int opal_sensor_groups_clear(u32 group_hndl, int token); > > /* Internal functions */ > extern int early_init_dt_scan_opal(unsigned long node, const char *uname, > @@ -351,6 +352,7 @@ static inline int opal_get_async_rc(struct opal_msg msg) > > void opal_powercap_init(void); > void opal_psr_init(void); > +int opal_sensor_groups_clear_history(u32 handle); > > #endif /* __ASSEMBLY__ */ > > diff --git a/arch/powerpc/include/uapi/asm/opal-occ.h > b/arch/powerpc/include/uapi/asm/opal-occ.h > new file mode 100644 > index 000..97c45e2 > --- /dev/null > +++ b/arch/powerpc/include/uapi/asm/opal-occ.h > @@ -0,0 +1,23 @@ > +/* > + * OPAL OCC command interface > + * Supported on POWERNV platform > + * > + * (C) Copyright IBM 2017 > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2, or (at your option) > + * any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#ifndef _UAPI_ASM_POWERPC_OPAL_OCC_H_ > +#define _UAPI_ASM_POWERPC_OPAL_OCC_H_ > + > +#define OPAL_OCC_IOCTL_CLEAR_SENSOR_GROUPS _IOR('o', 1, u32) > + > +#endif /* _UAPI_ASM_POWERPC_OPAL_OCC_H */ > diff --git a/arch/powerpc/platforms/powernv/Makefile > b/arch/powerpc/platforms/powernv/Makefile > index 9ed7d33..f193b33 100644 > --- a/arch/powerpc/platforms/powernv/Makefile > +++ b/arch/powerpc/platforms/powernv/Makefile > @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o > opal-async.o idle.o > obj-y+= opal-rtc.o opal-nvram.o opal-lpc.o > opal-flash.o > obj-y+= rng.o opal-elog.o opal-dump.o > opal-sysparam.o opal-sensor.o > obj-y+= opal-msglog.o opal-hmi.o opal-power.o > opal-irqchip.o > -obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o > +obj-y+= opal-kmsg.o opal-powercap.o opal-psr.o > opal-occ.o > > obj-$(CONFIG_SMP)+= smp.o subcore.o subcore-asm.o > obj-$(CONFIG_PCI)+= pci.o pci-ioda.o npu-dma.o > diff --git a/arch/powerpc/platforms/powernv/opal-occ.c > b/arch/powerpc/platforms/powernv/opal-occ.c > new file mode 100644 > index 000..d1d4b28 > --- /dev/null > +++ b/arch/powerpc/platforms/powernv/opal-occ.c > @@ -0,0 +1,109 @@ > +/* > + * Copyright IBM Corporation 2017 > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free