Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-05 Thread tiejun.chen

On 05/06/2013 11:10 AM, Tiejun Chen wrote:

For the external interrupt, the decrementer exception and the doorbell
excpetion, we also need to soft-disable interrupts while doing as host
interrupt handlers since the DO_KVM hook is always performed to skip
EXCEPTION_COMMON then miss this original chance with the 'ints' (INTS_DISABLE).


Sorry, miss to send Ben.

Tiejun



Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kvm/bookehv_interrupts.S |9 +
  1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S 
b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d6..2fd62bf 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -33,6 +33,8 @@

  #ifdef CONFIG_64BIT
  #include 
+#include 
+#include 
  #else
  #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
  #endif
@@ -469,6 +471,13 @@ _GLOBAL(kvmppc_resume_host)
PPC_LL  r3, HOST_RUN(r1)
mr  r5, r14 /* intno */
mr  r14, r4 /* Save vcpu pointer. */
+#ifdef CONFIG_64BIT
+   /* Should we soft-disable interrupts? */
+   andi.   r6, r5, BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER 
| BOOKE_INTERRUPT_DOORBELL
+   beq skip_soft_dis
+   SOFT_DISABLE_INTS(r7,r8)
+skip_soft_dis:
+#endif
bl  kvmppc_handle_exit

/* Restore vcpu pointer and the nonvolatiles we used. */



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc: enable doorbell exception with E500MC

2013-05-06 Thread tiejun.chen

On 05/06/2013 10:58 PM, Alexander Graf wrote:

On 05/06/2013 04:53 AM, Tiejun Chen wrote:

Actually E500MC also support doorbell exception, and CONFIG_PPC_E500MC
can cover BOOK3E/BOOK3E_64 as well.

Signed-off-by: Tiejun Chen
---
  arch/powerpc/kvm/booke.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1020119..dc1f590 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -795,7 +795,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
  kvmppc_fill_pt_regs(®s);
  timer_interrupt(®s);
  break;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+#if defined(CONFIG_PPC_E500MC)


I suppose you mean CONFIG_KVM_E500MC here? Why didn't this work for you before?


This works for me.

Here I just mean currently CONFIG_PPC_E500MC is always selected no matter what 
CONFIG_PPC_FSL_BOOK3E or CONFIG_PPC_BOOK3E_64 is enabled. And especially, this 
is already in the arch/powerpc/kvm/booke.c file, so I think one #ifdef 
(CONFIG_PPC_E500MC) is enough and also makes sense.



The ifdef above should cover the same range of CPUs.


Or furthermore, the #ifdef CONFIG_PPC_DOORBELL is reasonable to cover this.

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-06 Thread tiejun.chen

On 05/07/2013 07:50 AM, Scott Wood wrote:

On 05/05/2013 10:13:17 PM, tiejun.chen wrote:

On 05/06/2013 11:10 AM, Tiejun Chen wrote:

For the external interrupt, the decrementer exception and the doorbell
excpetion, we also need to soft-disable interrupts while doing as host
interrupt handlers since the DO_KVM hook is always performed to skip
EXCEPTION_COMMON then miss this original chance with the 'ints' (INTS_DISABLE).


http://patchwork.ozlabs.org/patch/241344/
http://patchwork.ozlabs.org/patch/241412/

:-)


I'm observing the same behaviour as well:

WARN_ON_ONCE(!irqs_disabled());




Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kvm/bookehv_interrupts.S |9 +
  1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d6..2fd62bf 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -33,6 +33,8 @@

  #ifdef CONFIG_64BIT
  #include 
+#include 
+#include 
  #else
  #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
  #endif
@@ -469,6 +471,13 @@ _GLOBAL(kvmppc_resume_host)
  PPC_LLr3, HOST_RUN(r1)
  mrr5, r14 /* intno */
  mrr14, r4 /* Save vcpu pointer. */
+#ifdef CONFIG_64BIT
+/* Should we soft-disable interrupts? */
+andi.r6, r5, BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER
| BOOKE_INTERRUPT_DOORBELL
+beqskip_soft_dis
+SOFT_DISABLE_INTS(r7,r8)
+skip_soft_dis:
+#endif


Why wouldn't we always disable them?  kvmppc_handle_exit() will enable
interrupts when it's ready.


This only disable soft interrupt for kvmppc_restart_interrupt() that restarts 
interrupts if they were meant for the host:


a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | 
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


b. bl  kvmppc_handle_exit

c. kvmppc_handle_exit()
{
int r = RESUME_HOST;
int s;

/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);

/* restart interrupts if they were meant for the host */
kvmppc_restart_interrupt(vcpu, exit_nr);

local_irq_enable(); ==> Enable again.


And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)   \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \
...

So I think this should be reasonable :)

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-06 Thread tiejun.chen

On 05/07/2013 10:06 AM, Scott Wood wrote:

On 05/06/2013 08:56:25 PM, tiejun.chen wrote:

On 05/07/2013 07:50 AM, Scott Wood wrote:

On 05/05/2013 10:13:17 PM, tiejun.chen wrote:

On 05/06/2013 11:10 AM, Tiejun Chen wrote:

For the external interrupt, the decrementer exception and the doorbell
excpetion, we also need to soft-disable interrupts while doing as host
interrupt handlers since the DO_KVM hook is always performed to skip
EXCEPTION_COMMON then miss this original chance with the 'ints'
(INTS_DISABLE).


http://patchwork.ozlabs.org/patch/241344/
http://patchwork.ozlabs.org/patch/241412/

:-)


I'm observing the same behaviour as well:

WARN_ON_ONCE(!irqs_disabled());


So, could you explain the benefits of your approach over what's being discussed
in those threads?


They're a long thread so I think I need to take time to see :)




Why wouldn't we always disable them?  kvmppc_handle_exit() will enable
interrupts when it's ready.


This only disable soft interrupt for kvmppc_restart_interrupt() that restarts
interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.  We could
get a TLB miss that results in a heavyweight MMIO exit, etc.


This is like host handler, so I'm just disabling soft interrupt during 
kvmppc_restart_interrupt() for Doorbell interrupt/Decrementer Interrupt/External 
Input Interrupt.


I don't see anything should be disabled for any TLB exception in host handler.



And I'd rather see any fix for this problem stay out of the asm code.


We already have an appropriate SOFT_DISABLE_INTS so I think we can take this 
easily :)





b. bl  kvmppc_handle_exit

c. kvmppc_handle_exit()
{
int r = RESUME_HOST;
int s;

/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);

/* restart interrupts if they were meant for the host */
kvmppc_restart_interrupt(vcpu, exit_nr);

local_irq_enable();==> Enable again.


And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)   \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \
...


Could you elaborate on what you mean?


In host handler, we always use MASKABLE_EXCEPTION() to define-to-handle some 
exceptions: Doorbell interrupt/Decrementer Interrupt/External Input Interrupt:


#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)   \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \

This would call INTS_DISABLE, which is equal to SOFT_DISABLE_INTS(), to disable 
soft interrupt before call all associated handlers: 
do_IRQ()/timer_interrupt()/doorbell_exception().


But DO_KVM hook always skips INTS_DISABLE.

So I think we also need to do INTS_DISABLE for kvmppc_restart_interrupt() since 
actually that restarts interrupts for the host with a similar way as they are 
called by host.


Tiejun



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: Make hard_irq_disable() do the right thing vs. irq tracing

2013-05-07 Thread tiejun.chen

On 05/07/2013 03:04 PM, Benjamin Herrenschmidt wrote:

If hard_irq_disable() is called while interrupts are already soft-disabled
(which is the most common case) all is already well.

However you can (and in some cases want) to call it while everything is
enabled (to make sure you don't get a lazy even, for example before entry
into KVM guests) and in this case we need to inform the irq tracer that
the irqs are going off.

We have to change the inline into a macro to avoid an include circular
dependency hell hole.

Signed-off-by: Benjamin Herrenschmidt 
---

Tested on pseries, Scott, I don't expect a problem with that patch especially
since most callers already are soft disabled, so I'll merge it now along with
my other pending stuff and you can simplify your KVM one.

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index e45c494..d615b28 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -95,15 +95,13 @@ static inline bool arch_irqs_disabled(void)
  #define __hard_irq_disable()  __mtmsrd(local_paca->kernel_msr, 1)
  #endif

-static inline void hard_irq_disable(void)
-{
-   __hard_irq_disable();
-   get_paca()->soft_enabled = 0;
-   get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;
-}
-
-/* include/linux/interrupt.h needs hard_irq_disable to be a macro */
-#define hard_irq_disable   hard_irq_disable
+#define hard_irq_disable() do {\
+   __hard_irq_disable();   \
+   if (local_paca->soft_enabled)\
+   trace_hardirqs_off();   \
+   get_paca()->soft_enabled = 0;\


Could we simplify this as follows:

+#define hard_irq_disable() do {\
+   __hard_irq_disable();   \
+   if (get_paca()->soft_enabled) {  \
+   trace_hardirqs_off();   \
+   get_paca()->soft_enabled = 0;\
+   }   \
+   get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;   \
+} while(0)

Tiejun


+   get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;   \
+} while(0)

  static inline bool lazy_irq_pending(void)
  {




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc: enable doorbell exception with E500MC

2013-05-07 Thread tiejun.chen

On 05/07/2013 09:33 AM, tiejun.chen wrote:

On 05/06/2013 10:58 PM, Alexander Graf wrote:

On 05/06/2013 04:53 AM, Tiejun Chen wrote:

Actually E500MC also support doorbell exception, and CONFIG_PPC_E500MC
can cover BOOK3E/BOOK3E_64 as well.

Signed-off-by: Tiejun Chen
---
  arch/powerpc/kvm/booke.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1020119..dc1f590 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -795,7 +795,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
  kvmppc_fill_pt_regs(®s);
  timer_interrupt(®s);
  break;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64)
+#if defined(CONFIG_PPC_E500MC)


I suppose you mean CONFIG_KVM_E500MC here? Why didn't this work for you before?


This works for me.

Here I just mean currently CONFIG_PPC_E500MC is always selected no matter what
CONFIG_PPC_FSL_BOOK3E or CONFIG_PPC_BOOK3E_64 is enabled. And especially, this
is already in the arch/powerpc/kvm/booke.c file, so I think one #ifdef
(CONFIG_PPC_E500MC) is enough and also makes sense.


The ifdef above should cover the same range of CPUs.


Or furthermore, the #ifdef CONFIG_PPC_DOORBELL is reasonable to cover this.



I think this may be better so I send next version with this. So please take a 
look at.


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest

2013-05-07 Thread tiejun.chen

On 05/08/2013 07:40 AM, Scott Wood wrote:

On 05/07/2013 06:06:30 AM, Tiejun Chen wrote:

We also can direct ISI exception to Guest like DSI.

Signed-off-by: Tiejun Chen 
---
 arch/powerpc/kvm/booke_emulate.c |3 +++
 arch/powerpc/kvm/e500mc.c|3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)


Are you seeing a real performance improvement from this?  This will interfere


No. But after we reduce the exit to host, shouldn't this improve performance?


somewhat with using the VF bit, if we were to ever do so, since VF only affects


Sorry, what is the VF you said?

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest

2013-05-08 Thread tiejun.chen

On 05/08/2013 05:20 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
Behalf Of tiejun.chen
Sent: Wednesday, May 08, 2013 4:54 AM
To: Wood Scott-B07421
Cc: ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org;
linuxppc-dev@lists.ozlabs.org
Subject: Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to
Guest

On 05/08/2013 07:40 AM, Scott Wood wrote:

On 05/07/2013 06:06:30 AM, Tiejun Chen wrote:

We also can direct ISI exception to Guest like DSI.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kvm/booke_emulate.c |3 +++
  arch/powerpc/kvm/e500mc.c|3 ++-
  2 files changed, 5 insertions(+), 1 deletion(-)


Are you seeing a real performance improvement from this?  This will

interfere

No. But after we reduce the exit to host, shouldn't this improve
performance?


We lose some flexibility for this so it make sense only if we gain
measurable improvements.


Sounds we have much more works to do.






somewhat with using the VF bit, if we were to ever do so, since VF only

affects

Sorry, what is the VF you said?


VF stands for virtualization fault see MAS8[VF] and we may use it for 
virtualized


I almost forget this point :)


MMIO. The hypervisor should deny execute access on pages marked with VF. 
Accordingly
in this case guest ISI exceptions should be handled by the hypervisor.


Thanks for your information.

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts


This only disable soft interrupt for kvmppc_restart_interrupt() that
restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the original
HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
 NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
 EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL
interrupts. There is special macro masked_interrupt_book3e in those exception
handlers that sets paca->irq_happened.

The list of replied interrupts is limited to asynchronous noncritical interrupts
which can be masked by MSR[EE] (therefore no TLB miss).


Embedded Perfmon interrupt is also asynchronous, Why that is not in the list of 
masked interruts.


Are you saying perfmon? If so, its also in that list:

START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)

Tiejun



-Bharat


Now on KVM book3e we
don't want to put them in the irq_happened lazy state but rather to execute them
directly, so there is no reason for exception handling symmetry between host and
guest.

-Mike


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 03:51 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 09, 2013 1:18 PM
To: Bhushan Bharat-R65777
Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc-
d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of
bounces+Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de;
kvm-...@vger.kernel.org; k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts


This only disable soft interrupt for kvmppc_restart_interrupt()
that restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the
original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
  NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
  EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only
EE/DEC/DBELL interrupts. There is special macro
masked_interrupt_book3e in those exception handlers that sets paca-

irq_happened.


The list of replied interrupts is limited to asynchronous noncritical
interrupts which can be masked by MSR[EE] (therefore no TLB miss).


Embedded Perfmon interrupt is also asynchronous, Why that is not in the list

of masked interruts.

Are you saying perfmon? If so, its also in that list:

  START_EXCEPTION(perfmon);
  NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
  PROLOG_ADDITION_NONE)
  EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)


Where it is recorded in paca->irq_happned to be replayed later ?


In stead of PROLOG_ADDITION_MASKABLE, actually we have nothing to do with 
PROLOG_ADDITION_NONE for perfmon, so perfmon can be executed without lazy state.


Tiejun





Tiejun



-Bharat


Now on KVM book3e we
don't want to put them in the irq_happened lazy state but rather to
execute them directly, so there is no reason for exception handling
symmetry between host and guest.

-Mike






___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 04:12 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Kevin Hao [mailto:haoke...@gmail.com]
Sent: Thursday, May 09, 2013 1:38 PM
To: Bhushan Bharat-R65777
Cc: tiejun.chen; Caraman Mihai Claudiu-B02008; k...@vger.kernel.org; Wood Scott-
B07421; ag...@suse.de; kvm-...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

On Thu, May 09, 2013 at 07:51:09AM +, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 09, 2013 1:18 PM
To: Bhushan Bharat-R65777
Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc-
d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts

On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf
bounces+Of Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de;
kvm-...@vger.kernel.org; k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts


This only disable soft interrupt for kvmppc_restart_interrupt()
that restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the
original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
  NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
  EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only
EE/DEC/DBELL interrupts. There is special macro
masked_interrupt_book3e in those exception handlers that sets
paca-

irq_happened.


The list of replied interrupts is limited to asynchronous
noncritical interrupts which can be masked by MSR[EE] (therefore no TLB

miss).


Embedded Perfmon interrupt is also asynchronous, Why that is not
in the list

of masked interruts.

Are you saying perfmon? If so, its also in that list:

  START_EXCEPTION(perfmon);
  NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
  PROLOG_ADDITION_NONE)
  EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)


Where it is recorded in paca->irq_happned to be replayed later ?


Actually we don't want replay the perfmon interrupt later. We would run it even
soft irq is disabled and just treat it as NMI. Please see the following function
quoted from arch/powerpc/perf/core-fsl-emb.c:
   /*
* If interrupts were soft-disabled when a PMU interrupt occurs, treat
* it as an NMI.
*/
   static inline int perf_intr_is_nmi(struct pt_regs *regs)
   {
   #ifdef __powerpc64__
   return !regs->softe;
   #else
   return 0;
   #endif
   }


Is it because that we cannot afford to lose perfmon interrupt for more accurate 
capturing of data ?


powerpc/perf: e500 support

This implements perf_event support for the Freescale embedded performance
monitor, based on the existing perf_event.c that supports server/classic
chips.

Some limitations:
- Performance monitor interrupts are regular EE interrupts, and thus you
  can't profile places with interrupts disabled.  We may want to implement
  soft IRQ-disabling, with perfmon interrupts exempted and treated as NMIs.

Tiejun



-Bharat



Thanks,
Kevin





Tiejun



-Bharat


Now on KVM book3e we
don't want to put them in the irq_happened lazy state but rather
to execute them directly, so there is no reason for exception
handling symmetry between host and guest.

-Mike




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev






___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts


This only disable soft interrupt for kvmppc_restart_interrupt() that
restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the original
HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
 NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
 EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL
interrupts. There is special macro masked_interrupt_book3e in those exception
handlers that sets paca->irq_happened.

The list of replied interrupts is limited to asynchronous noncritical interrupts
which can be masked by MSR[EE] (therefore no TLB miss). Now on KVM book3e we
don't want to put them in the irq_happened lazy state but rather to execute them
directly, so there is no reason for exception handling symmetry between host and
guest.



Another Question:

The case is:



Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I recall.


Case 1)
  -> Local_irq_disable()  will set soft_enabled = 0
  -> Now Externel interrupt happens, there we set PACA_IRQ_EE in irq_happened, 
Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No more other 
interrupt gated by MSR.EE can happen. Looks like the idea here is to not let a 
device keep on inserting interrupt till the interrupt condition on device is 
cleared, right?


I don't understand "the interrupt condition on device is cleared" here.

I think regardless if you clear the device interrupt status, the system still 
receive a pending interrupt once EE or GS = 1.



  -> local_irq_enable() - This checks that irq_happened is set, and replays


ret_from_except also check to replay.



Now the case 2)
Case 2)
-> Local_irq_disable()  will set soft_enabled = 0
  -> Now DEC interrupt happens. We set PACA_IRQ_DEC in irq_happened, But do not 
clear EE in SRR1 and rfi. So interrupts are not hard disabled.
  -> Now say EE interrupt happens, there we set PACA_IRQ_EE in irq_happened, 
Also clears EE in SRR1 and rfi. So interrupts are hard disabled.
  -> local_irq_enable() - This checks that irq_happened is set.
IIUC, it replays only one interrupt? is not it?


After anyone is replayed in arch_local_irq_restore(), we will set soft/hard 
interrupt there:


set_soft_enabled(1);
__hard_irq_enable();

Then any pending interrupt can be executed now.

Additionally, ret_from_except probably check to replay all.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 06:00 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 09, 2013 3:15 PM
To: Bhushan Bharat-R65777
Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc-
d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of
bounces+Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de;
kvm-...@vger.kernel.org; k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts


This only disable soft interrupt for kvmppc_restart_interrupt()
that restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the
original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
  NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
  EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only
EE/DEC/DBELL interrupts. There is special macro
masked_interrupt_book3e in those exception handlers that sets paca-

irq_happened.


The list of replied interrupts is limited to asynchronous noncritical
interrupts which can be masked by MSR[EE] (therefore no TLB miss).
Now on KVM book3e we don't want to put them in the irq_happened lazy
state but rather to execute them directly, so there is no reason for
exception handling symmetry between host and guest.



Another Question:

The case is:



Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I recall.


Case 1)
   -> Local_irq_disable()  will set soft_enabled = 0
   -> Now Externel interrupt happens, there we set PACA_IRQ_EE in irq_happened,

Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No more other
interrupt gated by MSR.EE can happen. Looks like the idea here is to not let a
device keep on inserting interrupt till the interrupt condition on device is
cleared, right?

I don't understand "the interrupt condition on device is cleared" here.

I think regardless if you clear the device interrupt status, the system still
receive a pending interrupt once EE or GS = 1.


Once yes, but I think to avoid flood of device interrupt we disable MSR.EE when 
soft-disabled.


But we neither ACK nor send EOI to that irq in the interrupt controller, so that 
should be in pending state.







   -> local_irq_enable() - This checks that irq_happened is set, and
replays


ret_from_except also check to replay.



Now the case 2)
Case 2)
-> Local_irq_disable()  will set soft_enabled = 0
   -> Now DEC interrupt happens. We set PACA_IRQ_DEC in irq_happened, But do

not clear EE in SRR1 and rfi. So interrupts are not hard disabled.

   -> Now say EE interrupt happens, there we set PACA_IRQ_EE in irq_happened,

Also clears EE in SRR1 and rfi. So interrupts are hard disabled.

   -> local_irq_enable() - This checks that irq_happened is set.
IIUC, it replays only one interrupt? is not it?


After anyone is replayed in arch_local_irq_restore(), we will set soft/hard
interrupt there:

set_soft_enabled(1);
__hard_irq_enable();

Then any pending interrupt can be executed now.


Do you mean that the interrupt should fire again?


I means the pending exception including external interrupt, the decrementer 
exception and the doorbell exception, can trap CPU once EE=1 with 
__hard_irq_enable() here. Then the kernel can handle those exception since soft 
enable is also 1 now.






Additionally, ret_from_except probably check to replay all.


Local_irq_enable() will not take us to ret_from_except.


Yes. I just say ret_from_except can provide an approach to replay all :)

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest

2013-05-09 Thread tiejun.chen

On 05/08/2013 05:28 PM, tiejun.chen wrote:

On 05/08/2013 05:20 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
Behalf Of tiejun.chen
Sent: Wednesday, May 08, 2013 4:54 AM
To: Wood Scott-B07421
Cc: ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org;
linuxppc-dev@lists.ozlabs.org
Subject: Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to
Guest

On 05/08/2013 07:40 AM, Scott Wood wrote:

On 05/07/2013 06:06:30 AM, Tiejun Chen wrote:

We also can direct ISI exception to Guest like DSI.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kvm/booke_emulate.c |3 +++
  arch/powerpc/kvm/e500mc.c|3 ++-
  2 files changed, 5 insertions(+), 1 deletion(-)


Are you seeing a real performance improvement from this?  This will

interfere

No. But after we reduce the exit to host, shouldn't this improve
performance?


We lose some flexibility for this so it make sense only if we gain
measurable improvements.


Sounds we have much more works to do.






somewhat with using the VF bit, if we were to ever do so, since VF only

affects

Sorry, what is the VF you said?


VF stands for virtualization fault see MAS8[VF] and we may use it for 
virtualized


I almost forget this point :)


Looks KVM PPC have no this mechanism currently since I don't find MAS8_VF is 
used in kernel, right?


If I'm missing something please correct me.

Tiejun




MMIO. The hypervisor should deny execute access on pages marked with VF.
Accordingly
in this case guest ISI exceptions should be handled by the hypervisor.




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

2013-05-09 Thread tiejun.chen

On 05/09/2013 07:21 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 09, 2013 3:48 PM
To: Bhushan Bharat-R65777
Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc-
d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts

On 05/09/2013 06:00 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 09, 2013 3:15 PM
To: Bhushan Bharat-R65777
Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc-
d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts

On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of
bounces+Caraman
Mihai Claudiu-B02008
Sent: Wednesday, May 08, 2013 6:44 PM
To: Wood Scott-B07421; tiejun.chen
Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de;
kvm-...@vger.kernel.org; k...@vger.kernel.org
Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable
interrupts


This only disable soft interrupt for kvmppc_restart_interrupt()
that restarts interrupts if they were meant for the host:

a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL |
BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL


Those aren't the only exceptions that can end up going to the host.
We could get a TLB miss that results in a heavyweight MMIO exit, etc.


And shouldn't we handle kvmppc_restart_interrupt() like the
original HOST flow?

#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr,
ack)   \

START_EXCEPTION(label); \
   NORMAL_EXCEPTION_PROLOG(trapnum, intnum,
PROLOG_ADDITION_MASKABLE)\
   EXCEPTION_COMMON(trapnum, PACA_EXGEN,
*INTS_DISABLE*) \
...


Could you elaborate on what you mean?


I think Tiejun was saying that host has flags and replays only
EE/DEC/DBELL interrupts. There is special macro
masked_interrupt_book3e in those exception handlers that sets paca-

irq_happened.


The list of replied interrupts is limited to asynchronous
noncritical interrupts which can be masked by MSR[EE] (therefore no TLB

miss).

Now on KVM book3e we don't want to put them in the irq_happened
lazy state but rather to execute them directly, so there is no
reason for exception handling symmetry between host and guest.



Another Question:

The case is:



Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I

recall.



Case 1)
-> Local_irq_disable()  will set soft_enabled = 0
-> Now Externel interrupt happens, there we set PACA_IRQ_EE in
irq_happened,

Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No
more other interrupt gated by MSR.EE can happen. Looks like the idea
here is to not let a device keep on inserting interrupt till the
interrupt condition on device is cleared, right?

I don't understand "the interrupt condition on device is cleared" here.

I think regardless if you clear the device interrupt status, the
system still receive a pending interrupt once EE or GS = 1.


Once yes, but I think to avoid flood of device interrupt we disable MSR.EE

when soft-disabled.

But we neither ACK nor send EOI to that irq in the interrupt controller, so that
should be in pending state.






-> local_irq_enable() - This checks that irq_happened is set, and
replays


ret_from_except also check to replay.



Now the case 2)
Case 2)
-> Local_irq_disable()  will set soft_enabled = 0
-> Now DEC interrupt happens. We set PACA_IRQ_DEC in
irq_happened, But do

not clear EE in SRR1 and rfi. So interrupts are not hard disabled.

-> Now say EE interrupt happens, there we set PACA_IRQ_EE in
irq_happened,

Also clears EE in SRR1 and rfi. So interrupts are hard disabled.

-> local_irq_enable() - This checks that irq_happened is set.
IIUC, it replays only one interrupt? is not it?


After anyone is replayed in arch_local_irq_restore(), we will set
soft/hard interrupt there:

set_soft_enabled(1);
__hard_irq_enable();

Then any pending interrupt can be executed now.


Do you mean that the interrupt should fire again?


I means the pending exception including external interrupt, the decrementer
exception and the doorbell exception, can trap CPU once EE=1 with
__hard_irq_enable() here. Then the kernel can handle those exception since soft
enable is also 1 now.





Additionally, ret_from_except probably check to replay all.


Local_irq_enable() will not take us to ret_from_except.


Yes. I just say ret_from_except can provide an approach to replay all :)


__replay_interrupt() from arch_local_irq_enable() will ta

Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest

2013-05-09 Thread tiejun.chen

On 05/09/2013 07:34 PM, Caraman Mihai Claudiu-B02008 wrote:

VF stands for virtualization fault see MAS8[VF] and we may use it for

virtualized

Looks KVM PPC have no this mechanism currently since I don't find MAS8_VF
is
used in kernel, right?


Yes but 'we may use it' in the feature, I have a functional POC with VF.


Any IO performance to be improved with this POC?


Now we capture virtualized MMIO accesses as TLB misses (we don't write
into HW TLB guest translation outside visible memslots).


Yes.

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 3/4] kvm/ppc: Call trace_hardirqs_on before entry

2013-05-09 Thread tiejun.chen

On 05/10/2013 11:34 AM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of
Scott Wood
Sent: Friday, May 10, 2013 8:40 AM
To: Alexander Graf; Benjamin Herrenschmidt
Cc: kvm-...@vger.kernel.org; k...@vger.kernel.org; 
linuxppc-dev@lists.ozlabs.org;
Wood Scott-B07421
Subject: [PATCH v2 3/4] kvm/ppc: Call trace_hardirqs_on before entry

Currently this is only being done on 64-bit.  Rather than just move it
out of the 64-bit ifdef, move it to kvm_lazy_ee_enable() so that it is
consistent with lazy ee state, and so that we don't track more host
code as interrupts-enabled than necessary.

Rename kvm_lazy_ee_enable() to kvm_fix_ee_before_entry() to reflect
that this function now has a role on 32-bit as well.

Signed-off-by: Scott Wood 
---
  arch/powerpc/include/asm/kvm_ppc.h |   11 ---
  arch/powerpc/kvm/book3s_pr.c   |4 ++--
  arch/powerpc/kvm/booke.c   |4 ++--
  arch/powerpc/kvm/powerpc.c |2 --
  4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h
b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe..6885846 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
}
  }

-/* Please call after prepare_to_enter. This function puts the lazy ee state
-   back to normal mode, without actually enabling interrupts. */
-static inline void kvmppc_lazy_ee_enable(void)
+/*
+ * Please call after prepare_to_enter. This function puts the lazy ee and irq
+ * disabled tracking state back to normal mode, without actually enabling
+ * interrupts.
+ */
+static inline void kvmppc_fix_ee_before_entry(void)
  {
+   trace_hardirqs_on();
+
  #ifdef CONFIG_PPC64
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bdc40b8..0b97ce4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -890,7 +890,7 @@ program_interrupt:
local_irq_enable();
r = s;
} else {
-   kvmppc_lazy_ee_enable();
+   kvmppc_fix_ee_before_entry();
}
}

@@ -1161,7 +1161,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);

-   kvmppc_lazy_ee_enable();
+   kvmppc_fix_ee_before_entry();

ret = __kvmppc_vcpu_run(kvm_run, vcpu);

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 705fc5c..eb89b83 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -673,7 +673,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu
*vcpu)
ret = s;
goto out;
}
-   kvmppc_lazy_ee_enable();
+   kvmppc_fix_ee_before_entry();


local_irq_disable() is called before kvmppc_prepare_to_enter().


In patch 4, we call hard_irq_disable() once enter kvmppc_prepare_to_enter().

Tiejun


Now we put the irq_happend and soft_enabled back to previous state without 
checking for any interrupt happened in between. If any interrupt happens in 
between, will not that be lost?

-Bharat



kvm_guest_enter();

@@ -1154,7 +1154,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
kvm_vcpu *vcpu,
local_irq_enable();
r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
} else {
-   kvmppc_lazy_ee_enable();
+   kvmppc_fix_ee_before_entry();
}
}

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee3..4e05f8c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
kvm_guest_exit();
continue;
}
-
-   trace_hardirqs_on();
  #endif

kvm_guest_enter();
--
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 4/4] kvm/ppc: IRQ disabling cleanup

2013-05-09 Thread tiejun.chen

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4e05f8c..f8659aa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
  {
int r = 1;

-   WARN_ON_ONCE(!irqs_disabled());
+   WARN_ON(irqs_disabled());
+   hard_irq_disable();


Here we hard disable in kvmppc_prepare_to_enter(), so my comment in other patch 
about interrupt loss is no more valid.

So here
   MSR.EE = 0
   local_paca->soft_enabled = 0
   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;


+
while (true) {
if (need_resched()) {
local_irq_enable();


This will make the state:
   MSR.EE = 1
   local_paca->soft_enabled = 1
   local_paca->irq_happened = PACA_IRQ_HARD_DIS;  //same as before


Why is this same the above state? local_irq_enable() can call 
__check_irq_replay() to clear PACA_IRQ_HARD_DIS.




Is that a valid state where interrupts are fully enabled and irq_happend in not 
0?


cond_resched();
-   local_irq_disable();
+   hard_irq_disable();
continue;
}

@@ -95,7 +97,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
local_irq_enable();
trace_kvm_check_requests(vcpu);
r = kvmppc_core_check_requests(vcpu);
-   local_irq_disable();
+   hard_irq_disable();
if (r > 0)
continue;
break;
@@ -108,21 +110,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
}

  #ifdef CONFIG_PPC64
-   /* lazy EE magic */
-   hard_irq_disable();
-   if (lazy_irq_pending()) {
-   /* Got an interrupt in between, try again */
-   local_irq_enable();
-   local_irq_disable();
-   kvm_guest_exit();
-   continue;
-   }
+   WARN_ON(lazy_irq_pending());
  #endif

kvm_guest_enter();
-   break;
+   return r;
}

+   local_irq_enable();
return r;
  }



int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
 int r = 0;
 WARN_ON_ONCE(!irqs_disabled());

 kvmppc_core_check_exceptions(vcpu);

 if (vcpu->requests) {
 /* Exception delivery raised request; start over */
 return 1;
 }

 if (vcpu->arch.shared->msr & MSR_WE) {
 local_irq_enable();
 kvm_vcpu_block(vcpu);
 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 local_irq_disable();
^^^
We do not require hard_irq_disable() here?


Between kvmppc_core_prepare_to_enter() and kvmppc_prepare_to_enter(), as I 
recall Scott had some discussions with Ben earlier.


Tiejun





___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest

2013-05-12 Thread tiejun.chen

On 05/11/2013 03:39 AM, Alexander Graf wrote:



Am 10.05.2013 um 21:22 schrieb Scott Wood :


On 05/10/2013 12:57:33 PM, Alexander Graf wrote:

Could you guys please collect performance data during the next weeks on both 
guest-directed ISIs as well as VF MMIOs (preferably with in-kernel MMIO), so 
that we can decide on the direction that's worth going towards?


Collecting data on VF MMIO would require implementing it (or at least salvaging 
and fixing some old code), which is not a high priority at the moment.  If we 
do implement VF in the future we could always undo the direct ISI change, but 
it would still be nice to know if there's any real benefit in the first place.


Mike sounded like he had an almost working poc, which is good enough to collect 
rough numbers.


Which can the test case be adopted?

Mike,

If you already have a good case for your poc, please share that with me. Then 
I'd like to run that.


Tiejun



And yes, changes like these should always get at least basic performance 
numbers along with them, regardless of drawbacks.


Alex



FWIW, I doubt that the "more stress on HW TLB" will be significant.

-Scott




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 01:45 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 14:47 +1000, Benjamin Herrenschmidt wrote:

Hi folks !

So I was trying to use my 5020ds to test some stuff today. Since I
hadn't used it in a while, I decided to "upgrade" it to the latest NOR
etc...


On another note, I can't seem to get any PCIe card recognized in any
slot...


This should depend on the RCW.

As I recall, slot 7 or slot 4 can be configured to support PCIe for P5020DS. And 
you can know this information according to RCW's README.


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:09 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 06:05 +, Zang Roy-R61911 wrote:

I do not suggest changing the RCW. If the RCW is broken on Ben's side,
it is not easy to recover for him.
Let's check the U-boot output first.


U-Boot 2013.01-9-g7bcd7f4 (Mar 14 2013 - 14:23:16)

CPU0:  P5020E, Version: 1.0, (0x82280010)
Core:  E5500, Version: 1.0, (0x80240010)
Clock Configuration:
CPU0:2000 MHz, CPU1:2000 MHz,
CCB:800  MHz,
DDR:666.667 MHz (1333.333 MT/s data rate) (Asynchronous), LBC:100  MHz
FMAN1: 600 MHz
QMAN:  400 MHz
PME:   400 MHz
L1:D-cache 32 kB enabled
I-cache 32 kB enabled
Board: P5020DS, Sys ID: 0x1c, Sys Ver: 0x12, FPGA Ver: 0x05, vBank: 0
Reset Configuration Word (RCW):
: 0c54  1e12 
0010: d8984a01 03002000 de80 4100
0020:    1007
0030:    


I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then please take 
a look at this:


The RCW directories names for the p5020ds board conform to the following naming
convention:

ab_bcdefghi_j:

a = 'R' if RGMII@DTSEC4 is supported / 'N' if not available/not used
b = 'R' if RGMII@DTSEC5 is supported / 'N" if not available/not used

c = What is available in Slot 1 or SATA
d = What is available in Slot 2
e = What is available in Slot 3
f = What is available in Slot 4
g = What is available in Slot 5
h = What is available in Slot 6
i = What is available in Slot 7

For the Slots (c..i):
 'N' if not available/not used
 'P' if PCIe
 'X' if XAUI
 'R' if SRIO
 'S' if SGMII
 'H' if SATA
 'A' is AURORA

j = 'hex value of serdes protocol value'

So NR_HXAPNSP_0x36 means:
 - no RGMII@DTSEC4
 - RGMII@DTSEC5
 - SATA [Slot 1 not used]
 - XAUI on Slot 2
 - AURORA on Slot 3
 - PCIE on Slot 4
 - SGMII on Slot 6
 - PCIE on Slot 7

Slot 5 is not used, and the SERDES Protocol is 0x36.

So slot 7 and slot 4 can be as PCIe slot.

Tiejun


SERDES Reference Clocks: Bank1=100Mhz Bank2=125Mhz Bank3=125Mhz
I2C:   ready
SPI:   ready
DRAM:  Initializingusing SPD
Detected UDIMM i-DIMM
Detected UDIMM i-DIMM
2 GiB left unmapped
4 GiB (DDR3, 64-bit, CL=9, ECC on)
DDR Controller Interleaving Mode: cache line
DDR Chip-Select Interleaving Mode: CS0+CS1
Testing 0x - 0x7fff
Testing 0x8000 - 0x
Remap DDR 2 GiB left unmapped

POST memory PASSED
Flash: 128 MiB
L2:512 KB enabled
Corenet Platform Cache: 2048 KB enabled
SRIO1: disabled
SRIO2: disabled
NAND:  1024 MiB
MMC:  FSL_SDHC: 0
EEPROM: NXID v1
PCIe1: Root Complex, no link, regs @ 0xfe20
PCIe1: Bus 00 - 00
PCIe2: disabled
PCIe3: Root Complex, no link, regs @ 0xfe202000
PCIe3: Bus 01 - 01
PCIe4: disabled
In:serial
Out:   serial
Err:   serial
Net:   Initializing Fman
Fman1: Uploading microcode version 106.1.7
PHY reset timed out
PHY reset timed out
PHY reset timed out
PHY reset timed out
FM1@DTSEC1, FM1@DTSEC2, FM1@DTSEC3, FM1@DTSEC4, FM1@DTSEC5, FM1@TGEC1
Hit any key to stop autoboot:  0
=>

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:20 PM, Zang Roy-R61911 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, May 16, 2013 2:18 PM
To: Benjamin Herrenschmidt
Cc: Zang Roy-R61911; Liu Qiang-B32616; Fleming Andy-AFLEMING; linuxppc-
d...@lists.ozlabs.org; Xie Shaohui-B21989; Bhushan Bharat-R65777
Subject: Re: SATA FSL and upstreaming

On 05/16/2013 02:09 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 06:05 +, Zang Roy-R61911 wrote:

I do not suggest changing the RCW. If the RCW is broken on Ben's
side, it is not easy to recover for him.
Let's check the U-boot output first.


U-Boot 2013.01-9-g7bcd7f4 (Mar 14 2013 - 14:23:16)

CPU0:  P5020E, Version: 1.0, (0x82280010)
Core:  E5500, Version: 1.0, (0x80240010) Clock Configuration:
 CPU0:2000 MHz, CPU1:2000 MHz,
 CCB:800  MHz,
 DDR:666.667 MHz (1333.333 MT/s data rate) (Asynchronous),

LBC:100  MHz

 FMAN1: 600 MHz
 QMAN:  400 MHz
 PME:   400 MHz
L1:D-cache 32 kB enabled
 I-cache 32 kB enabled
Board: P5020DS, Sys ID: 0x1c, Sys Ver: 0x12, FPGA Ver: 0x05, vBank: 0
Reset Configuration Word (RCW):
 : 0c54  1e12 
 0010: d8984a01 03002000 de80 4100
 0020:    1007
 0030:    


I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then
please take a look at this:

Why?


I just believe Bharat should pick a proper RCW from SDK.


Ben's on board RCW protocol is 0x36, which should work for PCIe1 (slot 7) and 
PCIe3 (slot4).


Didn't you see I'm also saying to use slot 7 and slot 4?

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:21 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 14:17 +0800, tiejun.chen wrote:

I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then
please take
a look at this:


Ok, how do I update my RCW to bse Bharat's ?



Firstly please check which flash bank is used since we have to know where should 
be updated RCW.


What is SW7[1:4]?

Or we have another simple way in u-boot prompt:

=> md.b ffdf002c
ffdf002c: 4f 00 fe 00 39 00 00 00 00 00 00 00 00 00 00 00O...9...
...

This means we're on bank4.



Any DIP switch setting I need to be aware of ?


No.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:37 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 06:33 +, Bhushan Bharat-R65777 wrote:

protect off 0xec00 +$filesize; erase 0xec00 +$filesize; cp.b
0x100 0xec00 $filesize


BTW, is it normal that the network in uboot is *extremely* unreliable ?


We can use serial port:

loadb   - load binary file over serial line (kermit mode)
loads   - load S-Record file over serial line
loady   - load binary file over serial line (ymodem mode)

Then please send the RCW with appropriate mode above in your terminal client.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:40 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 14:35 +0800, tiejun.chen wrote:

On 05/16/2013 02:21 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 14:17 +0800, tiejun.chen wrote:

I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then
please take
a look at this:


Ok, how do I update my RCW to bse Bharat's ?



Firstly please check which flash bank is used since we have to know where should
be updated RCW.

What is SW7[1:4]?

Or we have another simple way in u-boot prompt:

=> md.b ffdf002c
ffdf002c: 4f 00 fe 00 39 00 00 00 00 00 00 00 00 00 00 00O...9...
...


ffdf002c: 0f 00 fe 00 00 00 00 00 00 00 00 00 00 00 00 00


This means we're on bank4.


I assume that means bank0 ?


Yes, RCW should be burned to 0xec00.

In u-boot prompt:

=> loady
## Ready for binary (ymodem) download to 0x0100 at 115200 bps...
C

Then send that RCW with ymodem in your terminal client.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: SATA FSL and upstreaming

2013-05-15 Thread tiejun.chen

On 05/16/2013 02:53 PM, Benjamin Herrenschmidt wrote:

On Thu, 2013-05-16 at 06:49 +, Zang Roy-R61911 wrote:





Please also provide a RCW binary to Ben, if your guys insist updating  the RCW.


right, I just noticed it's ascii :-) That isn't going to work well...


Ben,

I already send my workable combination of u-boot and RCW privately to you, 
please take a try.


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v4 06/12] ARM: dove: add gigabit ethernet and mvmdio device tree nodes

2013-05-22 Thread tiejun.chen

On 05/22/2013 05:43 PM, Sebastian Hesselbarth wrote:

On 05/21/2013 07:48 PM, Andrew Lunn wrote:

On Tue, May 21, 2013 at 06:41:44PM +0200, Sebastian Hesselbarth wrote:

This patch adds orion-eth and mvmdio device tree nodes for DT enabled
Dove boards. As there is only one ethernet controller on Dove, a default
phy node is also added with a note to set its reg property on a per-board
basis.

Signed-off-by: Sebastian Hesselbarth
---

...

+ethernet-port@0 {
+device_type = "network";
+compatible = "marvell,orion-eth-port";
+reg =<0>;
+interrupts =<29>;
+/* overwrite MAC address in bootloader */
+local-mac-address = [00 00 00 00 00 00];


Hi Sebastian

Its probably a good idea to set the local administration bit in this
MAC address. i.e. first byte is 02.


Andrew,

we just need an invalid address here to trigger the default behavior of
the driver and load the MAC address from its register. As PPC binding
documentation also has all zero, I just took it.


The truth is in PPC case, often we set the real mac address with some variables 
like 'eth[x]addr' in u-boot prompt, then u-boot will parse that value to fill 
the dtb. At last the associated driver can get the actual mac address from the 
dtb. And especially for those older u-boot version, even you have to reset the 
'local-mac-address' property in dts directly with the real mac address before 
generate the dtb since the older u-boot have no this ability to fill dtb again 
before pass the kernel.


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/32bit, PREEMPT:Load TI_FLAGS to check NEED_RESCHED

2013-05-26 Thread tiejun.chen

On 05/27/2013 02:27 PM, Priyanka Jain wrote:

Add instruction to load TI_FLAGS in r8

While returning from exception handling in case of PREEMPT enabled,
_TIF_NEED_RESCHED bit is checked in TI_FLAGS (thread_info flag) of current
task. Only if this bit is set, it should continue with the process of
calling preempt_schedule_irq() to schedule highest priority task if
available.

Current code assumes that r8 contains TI_FLAGS and check this for
_TIF_NEED_RESCHED, but as r8 is modified in the code which executes before


Could you elaborate this scenario? As I take a look at this path:

...
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
addir5,r9,TI_FLAGS
0:  lwarx   r8,0,r5
andcr8,r8,r11
#ifdef CONFIG_IBM405_ERR77
dcbt0,r5
#endif
stwcx.  r8,0,r5
bne-0b
1:

#ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r9)
cmpwi   0,r0,0  /* if non-zero, just restore regs and return */
bne restore
andi.   r8,r8,_TIF_NEED_RESCHED
...

Where is R8 clobbered?

Tiejun


this check, r8 no longer contains the expected TI_FLAGS information.

As a result check for comparison with _TIF_NEED_RESCHED was failing even if
NEED_RESCHED bit is set in the current thread_info flag. Due to this,
preempt_schedule_irq() and in turn scheduler was not getting called even if
highest priority task is ready for execution.


Signed-off-by: Priyanka Jain 
---
  arch/powerpc/kernel/entry_32.S |1 +
  1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d22e73e..0239c7f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -887,6 +887,7 @@ resume_kernel:
  #ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r9)
+   lwz r8,TI_FLAGS(r9)
cmpwi   0,r0,0  /* if non-zero, just restore regs and return */
bne restore
andi.   r8,r8,_TIF_NEED_RESCHED



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/32bit, PREEMPT:Load TI_FLAGS to check NEED_RESCHED

2013-05-27 Thread tiejun.chen

On 05/27/2013 02:55 PM, Jain Priyanka-B32167 wrote:


If we go some more lines up in the same file, the code is

resume_kernel:
 /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
 CURRENT_THREAD_INFO(r9, r1)
 lwz r8,TI_FLAGS(r9)
 andis.  r8,r8,_TIF_EMULATE_STACK_STORE@h


Okay, but could you fix this directly like:

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e514de5..4498467 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -851,7 +851,7 @@ resume_kernel:
/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
CURRENT_THREAD_INFO(r9, r1)
lwz r8,TI_FLAGS(r9)
-   andis.  r8,r8,_TIF_EMULATE_STACK_STORE@h
+   andis.  r0,r8,_TIF_EMULATE_STACK_STORE@h
beq+1f

addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-29 Thread tiejun.chen

On 05/30/2013 11:42 AM, wolfking wrote:

hi, all
   I'm doing some developing on the windriver's sbc8548 board. The kernel I
use
is 3.6.10 and the u-boot version is 2012-10. I changed the board's
configuration:
the board now boot from the 64MB SODIMM Flash (not the default 8MB on-board
Flash
memory), and the PCI clock rate is changed to 33MHZ.
   Now the trouble I am in is that: the PCI card (a NIC card rtl8139) can be
accessed OK, while the PCIe card can't work, that is, the kernel can't
access
its internal register. The kernel can correctly probe the PCIe card. its
BAR0
is a I/O mapped register, I use ioport_map to map the BAR0 to kernel's
address
space, then use ioread8/iowrite8 to access its internal register, it doesn't
work.
I analyse the ioport_map function and find it just add the input parameter
to
a fixed _IO_BASE value, below is the function:
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);
}
the _IO_BASE value under sbc8548 is 0xfd7fd000, the value of ioport_map
paramenter
"port" is 0xfefff000. Obviously the add overflows, so the follow-up
operations
can't succeed. The value of "port" is got from the function
pci_resource_start.


In PPC case I/O is memory-mapped, so you should use ioremap() instead of 
ioport_map()..


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-30 Thread tiejun.chen

On 05/30/2013 03:19 PM, wolfking wrote:

hi, tiejun.chen:
   Thanks for replying.
   I tried to use ioremap too, but it doesn't work. The ioport_map method


What happened?

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-30 Thread tiejun.chen

On 05/30/2013 05:15 PM, wolfking wrote:

hi, tiejun.chen:
   When I use ioremap, the card seems to work fine. That is, I can access


Yes, ioremap() should work for MMIO.


part of all register. My PCIe card is a rs232 expand card, it has some
standard UART register, for example the SCR(scratch register). My driver
can access the SCR(write and read) normally, but the other registers
behave odd. For example, the DLM should be 0, but it reads 5. The card
has a software reset bit, when it is set to 1, the card reset itself. When
it finished reset, this reset bit should be back to 0. But In sbc8548, when
I set this
bit, it remains high. So I guess, the area I accessed is not the PCIe card,


I suspect you're missing some load/storage sync code, so what is your R/W 
function exactly?


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-30 Thread tiejun.chen

On 05/30/2013 06:02 PM, wolfking wrote:

I tried several R/W functions: inb/outb and ioread8/iowrite8. The inb/outb


What about out_be32/in_be32?

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-30 Thread tiejun.chen

On 05/30/2013 03:32 PM, wolfking wrote:

(continued)
   I traced the 8139too.c when it uses pci_iomap, the pci_iomap called the
ioport_map. The difference between 8139 and my PCIe card lies in the
"port" value :
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);


_IO_BASE is equal to isa_io_base. So if this is not zero, I think there's a isa 
bridge in your platform. So you can access these I/O ports based on that isa 
bridge/bus with ioreadx/iowritex.



}
   in 8139too.c, the "port" value is 0x1000; for my PCIe card, the "port"
value
is 0xfefff000. And the value is got from pci_resource_start. So you see, the


But this means the port is as memory-mapped so ioremap() should be workable in 
this case. Then out_bex/in_bex should be fine.


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: can't access PCIe card under sbc8548

2013-05-30 Thread tiejun.chen

On 05/31/2013 12:24 AM, Scott Wood wrote:

On 05/30/2013 07:49:48 AM, wolfking wrote:

tiejun.chen wrote
> On 05/30/2013 03:32 PM, wolfking wrote:
>> (continued)
>>I traced the 8139too.c when it uses pci_iomap, the pci_iomap called
>> the
>> ioport_map. The difference between 8139 and my PCIe card lies in the
>> "port" value :
>> void __iomem *ioport_map(unsigned long port, unsigned int len)
>> {
>> return (void __iomem *) (port + _IO_BASE);
>
> _IO_BASE is equal to isa_io_base. So if this is not zero, I think there's
> a isa
> bridge in your platform. So you can access these I/O ports based on that
> isa
> bridge/bus with ioreadx/iowritex.
>
> I tried ioread8/iowriet8 after ioremap, it doesn't work
>
>> }
>>in 8139too.c, the "port" value is 0x1000; for my PCIe card, the "port"
>> value
>> is 0xfefff000. And the value is got from pci_resource_start. So you see,
>> the
>
> But this means the port is as memory-mapped


Are you sure?  It could mean that it's on a non-primary bus and I/O for this bus


We can take a further look at '/proc/ioports', '/proc/iomem' and other message.


is mapped at a lower address than the primary.  Just because the addition is
wrapping around doesn't mean it's wrong.


> so ioremap() should be workable in this case. Then out_bex/in_bex should be
fine.


ioremap() and out_bex/in_bex are not appropriate for PCI I/O regions (and
presumably that's what it is, if pci_iomap is calling ioport_map).  Big-endian
is not appropriate for PCI in any case.


Yes, I should correct that PCI interprets all accesses as little-endian.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: fsqrt

2013-06-07 Thread tiejun.chen

On 06/07/2013 03:41 PM, Benjamin Herrenschmidt wrote:

Another question...

Do you guys happen to have a patch to emulate fsqrt in the kernel ?


Seems this is already emulated:

arch/powerpc/math-emu/fsqrt.c

You can enable CONFIG_MATH_EMULATION to try.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: fsqrt

2013-06-07 Thread tiejun.chen

On 06/07/2013 04:53 PM, Benjamin Herrenschmidt wrote:

On Fri, 2013-06-07 at 15:46 +0800, tiejun.chen wrote:

On 06/07/2013 03:41 PM, Benjamin Herrenschmidt wrote:

Another question...

Do you guys happen to have a patch to emulate fsqrt in the kernel ?


Seems this is already emulated:

arch/powerpc/math-emu/fsqrt.c

You can enable CONFIG_MATH_EMULATION to try.


Is math emu expected to work at all on top of a real FPU ? I though it
didn't ... maybe I'm wrong.


As I understand often the real FPU can't support all float instructions, so we 
have to enable this to emulate those unsupported float instructions in that 
scenario.


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on "ehpriv" instruction

2013-06-25 Thread tiejun.chen

On 06/26/2013 01:42 PM, Bharat Bhushan wrote:

"ehpriv" instruction is used for setting software breakpoints
by user space. This patch adds support to exit to user space
with "run->debug" have relevant information.

As this is the first point we are using run->debug, also defined
the run->debug structure.

Signed-off-by: Bharat Bhushan 
---
  arch/powerpc/include/asm/disassemble.h |4 
  arch/powerpc/include/uapi/asm/kvm.h|   21 +
  arch/powerpc/kvm/e500_emulate.c|   27 +++
  3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/disassemble.h 
b/arch/powerpc/include/asm/disassemble.h
index 9b198d1..856f8de 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
return inst & 0x;
  }

+static inline unsigned int get_oc(u32 inst)
+{
+   return (inst >> 11) & 0x7fff;
+}
  #endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 0fb1a6e..ded0607 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -269,7 +269,24 @@ struct kvm_fpu {
__u64 fpr[32];
  };

+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE  0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL << 3)
  struct kvm_debug_exit_arch {
+   __u64 address;
+   /*
+* exiting to userspace because of h/w breakpoint, watchpoint
+* (read, write or both) and software breakpoint.
+*/
+   __u32 status;
+   __u32 reserved;
  };

  /* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +298,6 @@ struct kvm_guest_debug_arch {
 * Type denotes h/w breakpoint, read watchpoint, write
 * watchpoint or watchpoint (both read and write).
 */
-#define KVMPPC_DEBUG_NONE  0x0
-#define KVMPPC_DEBUG_BREAKPOINT(1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE   (1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ(1UL << 3)
__u32 type;
__u32 reserved;
} bp[16];
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a012..dab9d07 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,8 @@
  #define XOP_TLBRE   946
  #define XOP_TLBWE   978
  #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
+#define EHPRIV_OC_DEBUG 0


As I think the case, "OC = 0", is a bit specific since IIRC, if the OC
operand is omitted, its equal 0 by default. So I think we should start this OC 
value from 1 or other magic number.


And if possible, we'd better add some comments to describe this to make the OC 
definition readable.


Tiejun



  #ifdef CONFIG_KVM_E500MC
  static int dbell2prio(ulong param)
@@ -82,6 +84,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, 
int rb)
  }
  #endif

+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+  unsigned int inst, int *advance)
+{
+   int emulated = EMULATE_DONE;
+
+   switch (get_oc(inst)) {
+   case EHPRIV_OC_DEBUG:
+   run->exit_reason = KVM_EXIT_DEBUG;
+   run->debug.arch.address = vcpu->arch.pc;
+   run->debug.arch.status = 0;
+   kvmppc_account_exit(vcpu, DEBUG_EXITS);
+   emulated = EMULATE_EXIT_USER;
+   *advance = 0;
+   break;
+   default:
+   emulated = EMULATE_FAIL;
+   }
+   return emulated;
+}
+
  int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 unsigned int inst, int *advance)
  {
@@ -130,6 +152,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
break;

+   case XOP_EHPRIV:
+   emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
+  advance);
+   break;
+
default:
emulated = EMULATE_FAIL;
}



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on "ehpriv" instruction

2013-06-26 Thread tiejun.chen

On 06/26/2013 04:44 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Wednesday, June 26, 2013 12:25 PM
To: Bhushan Bharat-R65777
Cc: kvm-...@vger.kernel.org; k...@vger.kernel.org; ag...@suse.de; Wood Scott-
B07421; b...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org; linux-
ker...@vger.kernel.org; mi...@neuling.org; Bhushan Bharat-R65777
Subject: Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on "ehpriv" instruction

On 06/26/2013 01:42 PM, Bharat Bhushan wrote:

"ehpriv" instruction is used for setting software breakpoints
by user space. This patch adds support to exit to user space
with "run->debug" have relevant information.

As this is the first point we are using run->debug, also defined
the run->debug structure.

Signed-off-by: Bharat Bhushan 
---
   arch/powerpc/include/asm/disassemble.h |4 
   arch/powerpc/include/uapi/asm/kvm.h|   21 +
   arch/powerpc/kvm/e500_emulate.c|   27 +++
   3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/disassemble.h

b/arch/powerpc/include/asm/disassemble.h

index 9b198d1..856f8de 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
return inst & 0x;
   }

+static inline unsigned int get_oc(u32 inst)
+{
+   return (inst >> 11) & 0x7fff;
+}
   #endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h

b/arch/powerpc/include/uapi/asm/kvm.h

index 0fb1a6e..ded0607 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -269,7 +269,24 @@ struct kvm_fpu {
__u64 fpr[32];
   };

+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE  0x0
+#define KVMPPC_DEBUG_BREAKPOINT(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE   (1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ(1UL << 3)
   struct kvm_debug_exit_arch {
+   __u64 address;
+   /*
+* exiting to userspace because of h/w breakpoint, watchpoint
+* (read, write or both) and software breakpoint.
+*/
+   __u32 status;
+   __u32 reserved;
   };

   /* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +298,6 @@ struct kvm_guest_debug_arch {
 * Type denotes h/w breakpoint, read watchpoint, write
 * watchpoint or watchpoint (both read and write).
 */
-#define KVMPPC_DEBUG_NONE  0x0
-#define KVMPPC_DEBUG_BREAKPOINT(1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE   (1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ(1UL << 3)
__u32 type;
__u32 reserved;
} bp[16];
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index b10a012..dab9d07 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,8 @@
   #define XOP_TLBRE   946
   #define XOP_TLBWE   978
   #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
+#define EHPRIV_OC_DEBUG 0


As I think the case, "OC = 0", is a bit specific since IIRC, if the OC
operand is omitted, its equal 0 by default. So I think we should start this OC
value from 1 or other magic number.


ehpriv instruction is defined to be used as:
ehpriv OC // where OC can be 0,1, ... n
and in extended for it can be used as
ehpriv // With no OC, and here it assumes OC = 0
So OC = 0 is not specific but "ehpriv" is same as "ehpriv 0".


Yes, this is just what I mean.



I do not think of any special reason to reserve "ehpriv" and "ehpriv 0".


So I still prefer we can reserve the 'ehpriv' without OC operand as one simple 
approach to test or develop something for KVM quickly because its really 
convenient to trap into the hypervisor only with one 'ehpriv' instruction easily.


But I have no further objection if you guys are fine to this ;-)

Tiejun



Thanks
-Bharat



And if possible, we'd better add some comments to describe this to make the OC
definition readable.

Tiejun



   #ifdef CONFIG_KVM_E500MC
   static int dbell2prio(ulong param)
@@ -82,6 +84,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu,

int rb)

   }
   #endif

+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu

*vcpu,

+  unsigned int inst, int *advance)
+{
+   int emulated = EMULATE_DONE;
+
+   switch (get_oc(inst)) {
+   case EHPRIV_OC_DEBUG:
+   run-&

Re: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE

2013-07-09 Thread tiejun.chen

On 07/02/2013 01:00 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen
Sent: Thursday, June 20, 2013 1:23 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE

book3e is different with book3s since 3s includes the exception
vectors code in head_64.S as it relies on absolute addressing
which is only possible within this compilation unit. So we have
to get that label address with got.

And when boot a relocated kernel, we should reset ipvr properly again
after .relocate.

Signed-off-by: Tiejun Chen 
---


[snip]


int *src, *dest;
unsigned long length;
+#ifdef CONFIG_PPC_BOOK3E
+   extern char interrupt_end_book3e[];
+#endif


Cannot we do this in arch/powerpc/kernel/asm/sections.h



if (PHYSICAL_START == 0)
return;

src = (int *)(KERNELBASE + PHYSICAL_START);
dest = (int *)KERNELBASE;
+#ifdef CONFIG_PPC_BOOK3E
+   length = (interrupt_end_book3e - _stext) / sizeof(int);
+#else
length = (__end_interrupts - _stext) / sizeof(int);
+#endif


can we keep same name in books and booke; __end_interrupts ? this way we can 
avoid such #ifdefs


Yes, I think I can simplify this as you pointed :)

Thanks,

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v2][PATCH 2/7] book3e/kexec/kdump: enable kexec for kernel

2013-07-09 Thread tiejun.chen

On 07/02/2013 01:17 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen
Sent: Thursday, June 20, 2013 1:23 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v2][PATCH 2/7] book3e/kexec/kdump: enable kexec for kernel

We need to active KEXEC for book3e and bypass or convert non-book3e stuff
in kexec coverage.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/Kconfig   |2 +-
  arch/powerpc/kernel/machine_kexec_64.c |6 ++
  arch/powerpc/kernel/misc_64.S  |6 ++
  3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c33e3ad..6ecf3c9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -364,7 +364,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE

  config KEXEC
bool "kexec system call"
-   depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+   depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) || PPC_BOOK3E
help
  kexec is a system call that implements the ability to shutdown your
  current kernel, and to start another kernel.  It is like a reboot
diff --git a/arch/powerpc/kernel/machine_kexec_64.c
b/arch/powerpc/kernel/machine_kexec_64.c
index 611acdf..ef39271 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -33,6 +33,7 @@
  int default_machine_kexec_prepare(struct kimage *image)
  {
int i;
+#ifndef CONFIG_PPC_BOOK3E
unsigned long begin, end;   /* limits of segment */
unsigned long low, high;/* limits of blocked memory range */
struct device_node *node;
@@ -41,6 +42,7 @@ int default_machine_kexec_prepare(struct kimage *image)

if (!ppc_md.hpte_clear_all)
return -ENOENT;
+#endif


Do we really need this function for book3e? can we have a separate function 
rather than multiple confusing ifdef?


I prefer we have a separate function to book3e.

Thanks

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v2][PATCH 4/7] book3e/kexec/kdump: introduce a kexec kernel flag

2013-07-09 Thread tiejun.chen

On 07/02/2013 01:37 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen
Sent: Thursday, June 20, 2013 1:23 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v2][PATCH 4/7] book3e/kexec/kdump: introduce a kexec kernel flag

We need to introduce a flag to indicate we're already running
a kexec kernel then we can go proper path. For example, We
shouldn't access spin_table from the bootloader to up any secondary
cpu for kexec kernel, and kexec kernel already know how to jump to
generic_secondary_smp_init.

Signed-off-by: Tiejun Chen 
---


[snip]


+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -150,6 +150,9 @@ static int __cpuinit smp_85xx_kick_cpu(int nr)
int hw_cpu = get_hard_smp_processor_id(nr);
int ioremappable;
int ret = 0;
+#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP)
+   unsigned long *ptr;
+#endif


What about if we can remove the ifdef around *ptr ...



WARN_ON(nr < 0 || nr >= NR_CPUS);
WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS);
@@ -238,11 +241,22 @@ out:
  #else
smp_generic_kick_cpu(nr);

+#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP)
+   ptr  = (unsigned long *)((unsigned long)&__run_at_kexec);


... #endif here ...


+   /* We shouldn't access spin_table from the bootloader to up any
+* secondary cpu for kexec kernel, and kexec kernel already
+* know how to jump to generic_secondary_smp_init.
+*/
+   if (!*ptr) {
+#endif


... remove #endif ...


flush_spin_table(spin_table);
out_be32(&spin_table->pir, hw_cpu);
out_be64((u64 *)(&spin_table->addr_h),
  __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
flush_spin_table(spin_table);
+#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP)
+   }
+#endif


--- remove above 3 lines


I'd like to try to address your comments next version.

Thanks

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE

2013-07-09 Thread tiejun.chen

On 07/03/2013 07:52 PM, Sethi Varun-B16395 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+varun.sethi=freescale@lists.ozlabs.org] On Behalf Of Tiejun
Chen
Sent: Thursday, June 20, 2013 1:23 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE

book3e is different with book3s since 3s includes the exception vectors
code in head_64.S as it relies on absolute addressing which is only
possible within this compilation unit. So we have to get that label
address with got.

And when boot a relocated kernel, we should reset ipvr properly again
after .relocate.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/include/asm/exception-64e.h |8 
  arch/powerpc/kernel/exceptions-64e.S |   15 ++-
  arch/powerpc/kernel/head_64.S|   22 ++
  arch/powerpc/lib/feature-fixups.c|7 +++
  4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/exception-64e.h
b/arch/powerpc/include/asm/exception-64e.h
index 51fa43e..89e940d 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -214,10 +214,18 @@ exc_##label##_book3e:
  #define TLB_MISS_STATS_SAVE_INFO_BOLTED  #endif

+#ifndef CONFIG_RELOCATABLE
  #define SET_IVOR(vector_number, vector_offset)\
li  r3,vector_offset@l; \
ori r3,r3,interrupt_base_book3e@l;  \
mtspr   SPRN_IVOR##vector_number,r3;
+#else
+#define SET_IVOR(vector_number, vector_offset) \
+   LOAD_REG_ADDR(r3,interrupt_base_book3e);\
+   rlwinm  r3,r3,0,15,0;   \
+   ori r3,r3,vector_offset@l;  \
+   mtspr   SPRN_IVOR##vector_number,r3;
+#endif


[Sethi Varun-B16395] Please add a documentation note here.


Okay.




  #endif /* _ASM_POWERPC_EXCEPTION_64E_H */

diff --git a/arch/powerpc/kernel/exceptions-64e.S
b/arch/powerpc/kernel/exceptions-64e.S
index 645170a..4b23119 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1097,7 +1097,15 @@ skpinv:  addir6,r6,1
/* Increment */
   * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
   */
/* Now we branch the new virtual address mapped by this entry */
+#ifdef CONFIG_RELOCATABLE
+   /* We have to find out address from lr. */
+   bl  1f  /* Find our address */
+1: mflrr6
+   addir6,r6,(2f - 1b)
+   tovirt(r6,r6)
+#else
LOAD_REG_IMMEDIATE(r6,2f)
+#endif
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
mtspr   SPRN_SRR0,r6
@@ -1348,9 +1356,14 @@ _GLOBAL(book3e_secondary_thread_init)
mflrr28
b   3b

-_STATIC(init_core_book3e)
+_GLOBAL(init_core_book3e)
/* Establish the interrupt vector base */
+#ifdef CONFIG_RELOCATABLE
+   tovirt(r2,r2)
+   LOAD_REG_ADDR(r3, interrupt_base_book3e) #else
LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+#endif
mtspr   SPRN_IVPR,r3
sync
blr

[Sethi Varun-B16395] Please add a documentation note here as well.


Okay.




diff --git a/arch/powerpc/kernel/head_64.S
b/arch/powerpc/kernel/head_64.S index b61363d..0942f3a 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -414,12 +414,22 @@ _STATIC(__after_prom_start)
/* process relocations for the final address of the kernel */
lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
sldir25,r25,32
+#if defined(CONFIG_PPC_BOOK3E)
+   tovirt(r26,r26) /* on booke, we already run at
PAGE_OFFSET */
+#endif
lwz r7,__run_at_load-_stext(r26)
+#if defined(CONFIG_PPC_BOOK3E)
+   tophys(r26,r26) /* Restore for the remains. */
+#endif
cmplwi  cr0,r7,1/* flagged to stay where we are ? */
bne 1f
add r25,r25,r26
  1:mr  r3,r25
bl  .relocate
+#if defined(CONFIG_PPC_BOOK3E)
+   /* We should set ivpr again after .relocate. */
+   bl  .init_core_book3e
+#endif
  #endif


[Sethi Varun-B16395] A more detailed note over here would be useful.


Okay.




  /*
@@ -447,12 +457,24 @@ _STATIC(__after_prom_start)
   * variable __run_at_load, if it is set the kernel is treated as
relocatable
   * kernel, otherwise it will be moved to PHYSICAL_START
   */
+#if defined(CONFIG_PPC_BOOK3E)
+   tovirt(r26,r26) /* on booke, we already run at
PAGE_OFFSET */
+#endif
lwz r7,__run_at_load-_stext(r26)
+#if defined(CONFIG_PPC_BOOK3E)
+   tophys(r26,r26) /* Restore for the remains. */
+#endif
cmplwi  cr0,r7,1
bne 3f

+#ifdef CONFIG_PPC_BOOK3E
+   LOAD_REG_ADDR(r5, interrupt_end_book3e)
+   LOAD_REG_ADDR(r11, _stext)
+   sub r5,r5,r11
+#else
/* just co

Re: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with __end_interrupts

2013-07-09 Thread tiejun.chen

On 07/10/2013 01:17 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen
Sent: Tuesday, July 09, 2013 1:33 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with
__end_interrupts

We can rename 'interrupt_end_book3e' with '__end_interrupts' then book3s/book3e
can share this unique label to make sure we can use this conveniently.


I think we can be consistent with start and end names, no?


Are you saying to rename 'interrupt_base_book3e' with '__base_interrupts' here? 
But seems optional since book3s have no this similar start label, so I'd like to 
keep that as original now.


Lets listen other comments firstly.

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET

2013-07-09 Thread tiejun.chen

On 07/10/2013 01:20 PM, Bhushan Bharat-R65777 wrote:




-Original Message-
From: Linuxppc-dev [mailto:linuxppc-dev-
bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen
Sent: Tuesday, July 09, 2013 1:33 PM
To: b...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET

Book3e is always aligned 1GB to create TLB so we should
use (KERNELBASE - MEMORY_START) as VIRT_PHYS_OFFSET to
get __pa/__va properly while boot kdump.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/include/asm/page.h |2 ++
  1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 988c812..5b00081 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -112,6 +112,8 @@ extern long long virt_phys_offset;
  /* See Description below for VIRT_PHYS_OFFSET */
  #ifdef CONFIG_RELOCATABLE_PPC32
  #define VIRT_PHYS_OFFSET virt_phys_offset
+#elif defined(CONFIG_PPC_BOOK3E_64)
+#define VIRT_PHYS_OFFSET (KERNELBASE - MEMORY_START)


Can you please explain this code a bit more. I am not understanding this part:)


Nothing is special, we only need to redefine this to make sure __va()/__pa() can 
work well for BOOk3E-64 in BOOKE case:


#ifdef CONFIG_BOOKE
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)

And the arch/powerpc/include/asm/page.h file has more descriptions inline :)

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: BUG: 32 Bit Kernel kexec hangs on P2020

2013-07-10 Thread tiejun.chen

On 07/10/2013 06:00 PM, Stefani Seibold wrote:

Am Mittwoch, den 10.07.2013, 16:48 +0800 schrieb tiejun.chen:

On 07/10/2013 04:39 PM, Stefani Seibold wrote:

Hi,

i have tried to kexec a 32 bit kernel on a Freescale P2020 dual core CPU
(e500v2, revison 5.1 - pvr 8021 1051), but Kexec will hang after the
"Bye!".

The host and the kexec kernel are the same, based on the current kernel
version 3.10.

I have tried it with kexec-tools 2.0.4.git released 30 June 2013.

Invoking kexec with

kexec --command-line "1 maxcpus=1 noirqdistrib reset_devices $(cat 
/proc/cmdline)" -t elf-ppc --dtb=rs2020.dtb \
--reuse-node="/cpus/PowerPC,P2020@0/timebase-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/bus-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/clock-frequency" \
--reuse-node="/cpus/PowerPC,P2020@0/next-level-cache" \
--reuse-node="/cpus/PowerPC,P2020@1/timebase-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/bus-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/clock-frequency" \
--reuse-node="/cpus/PowerPC,P2020@1/next-level-cache" \
--reuse-node="/cpus/PowerPC,P2020@1/cpu-release-addr" \
--reuse-node="/cpus/PowerPC,P2020@1/enable-method" \
--reuse-node="/soc@ffe0/bus-frequency" \
--reuse-node="/soc@ffe0/serial@4500/clock-frequency" \
--reuse-node="/soc@ffe0/ethernet@24000/local-mac-address" \
-d -l -x vmlinux
kexec -e


Could you try again with one simple command like,

kexec -l vmlinux --append="`cat /proc/cmdline`";kexec -e



Great, this works.

I did more test and the issue for the fail is the passing of the device
tree. But the device tree is exact the same as the previous used one.

Is passing a new device tree not allowed?


No, I think this is not interdicted.

I suspect you probably don't pass a proper dtb, so you can take a further test 
to figure out this problem.


For example, in the u-boot prompt, you can use the fdt command to get last dtb 
after that board setup from u-boot, then pass that directly to try.


Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][PATCH 1/1] KVM: PPC: disable preemption when using hard_irq_disable()

2013-07-10 Thread tiejun.chen

On 07/11/2013 03:15 AM, Scott Wood wrote:

On 07/10/2013 01:02:19 AM, Tiejun Chen wrote:

We should ensure the preemption cannot occur while calling get_paca()
insdide hard_irq_disable(), otherwise the paca_struct may be the
wrong one just after. And btw, we may update timing stats in this case.


The soft-ee mechanism depends on accessing the PACA directly via r13 to avoid
this.  We probably should be using inline asm to read was_enabled rather than


Yes.


hoping the compiler doesn't do anything silly.


Do you recommend I should directly replace get_paca() with local_paca inside 
hard_irq_disable()?


#define hard_irq_disable()  do {\
u8 _was_enabled = get_paca()->soft_enabled; \

->   u8 _was_enabled = local_paca->soft_enabled;

But is this safe for all scenarios?

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 6/6 v2] kvm: powerpc: use caching attributes as per linux pte

2013-08-01 Thread tiejun.chen

On 08/01/2013 07:12 PM, Bharat Bhushan wrote:

KVM uses same WIM tlb attributes as the corresponding qemu pte.
For this we now search the linux pte for the requested page and
get these cache caching/coherency attributes from pte.

Signed-off-by: Bharat Bhushan 
---
v1->v2
  - Use Linux pte for wimge rather than RAM/no-RAM mechanism

  arch/powerpc/include/asm/kvm_host.h |2 +-
  arch/powerpc/kvm/booke.c|2 +-
  arch/powerpc/kvm/e500.h |8 +---
  arch/powerpc/kvm/e500_mmu_host.c|   31 ++-
  4 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3328353..583d405 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -535,6 +535,7 @@ struct kvm_vcpu_arch {
  #endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
+   pgd_t *pgdir;

u8 io_gpr; /* GPR used as IO source/target */
u8 mmio_is_bigendian;
@@ -592,7 +593,6 @@ struct kvm_vcpu_arch {
struct list_head run_list;
struct task_struct *run_task;
struct kvm_run *kvm_run;
-   pgd_t *pgdir;

spinlock_t vpa_update_lock;
struct kvmppc_vpa vpa;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 17722d8..eb2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -697,7 +697,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
  #endif

kvmppc_fix_ee_before_entry();
-
+   vcpu->arch.pgdir = current->mm->pgd;
ret = __kvmppc_vcpu_run(kvm_run, vcpu);

/* No need for kvm_guest_exit. It's done in handle_exit.
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 4fd9650..fc4b2f6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -31,11 +31,13 @@ enum vcpu_ftr {
  #define E500_TLB_NUM   2

  /* entry is mapped somewhere in host TLB */
-#define E500_TLB_VALID (1 << 0)
+#define E500_TLB_VALID (1 << 31)
  /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */
-#define E500_TLB_BITMAP(1 << 1)
+#define E500_TLB_BITMAP(1 << 30)
  /* TLB1 entry is mapped by host TLB0 */
-#define E500_TLB_TLB0  (1 << 2)
+#define E500_TLB_TLB0  (1 << 29)
+/* Lower 5 bits have WIMGE value */
+#define E500_TLB_WIMGE_MASK(0x1f)

  struct tlbe_ref {
pfn_t pfn;  /* valid only for TLB0, except briefly */
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d7..9b10b0b 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -64,15 +64,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int 
usermode)
return mas3;
  }

-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
-{
-#ifdef CONFIG_SMP
-   return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-   return mas2 & MAS2_ATTRIB_MASK;
-#endif
-}
-
  /*
   * writing shadow tlb entry to host TLB
   */
@@ -248,10 +239,12 @@ static inline int tlbe_is_writable(struct 
kvm_book3e_206_tlb_entry *tlbe)

  static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 struct kvm_book3e_206_tlb_entry *gtlbe,
-pfn_t pfn)
+pfn_t pfn, int wimg)
  {
ref->pfn = pfn;
ref->flags |= E500_TLB_VALID;
+   /* Use guest supplied MAS2_G and MAS2_E */
+   ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;

if (tlbe_is_writable(gtlbe))
kvm_set_pfn_dirty(pfn);
@@ -312,8 +305,7 @@ static void kvmppc_e500_setup_stlbe(

/* Force IPROT=0 for all guest mappings. */
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
-   stlbe->mas2 = (gvaddr & MAS2_EPN) |
- e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+   stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_WIMGE_MASK);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);

@@ -332,6 +324,8 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
+   pte_t pte;
+   int wimg = 0;

/*
 * Translate guest physical to true physical, acquiring
@@ -437,6 +431,8 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,

if (likely(!pfnmap)) {
unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+   pgd_t *pgdir;
+
pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_noslot_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
@@ -447,9 +443,18 @@ static inline int kvmppc_e500_shadow_map(struct 
kvmppc_

Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like booke3s

2013-08-01 Thread tiejun.chen

On 08/01/2013 07:12 PM, Bharat Bhushan wrote:

KVM need to lookup linux pte for getting TLB attributes (WIMGE).
This is similar to how book3s does.
This will be used in follow-up patches.

Signed-off-by: Bharat Bhushan 
---
v1->v2
  - This is a new change in this version

  arch/powerpc/include/asm/kvm_booke.h |   73 ++
  1 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_booke.h 
b/arch/powerpc/include/asm/kvm_booke.h
index d3c1eb3..903624d 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -102,4 +102,77 @@ static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu)
  {
return vcpu->arch.shared->msr;
  }
+
+/*
+ * Lock and read a linux PTE.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PTE, otherwise return 0.
+ */
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
+{
+   pte_t pte;
+
+#ifdef PTE_ATOMIC_UPDATES
+   pte_t tmp;
+/* wait until _PAGE_BUSY is clear then set it atomically */
+#ifdef CONFIG_PPC64
+   __asm__ __volatile__ (
+   "1:ldarx   %0,0,%3\n"
+   "  andi.   %1,%0,%4\n"
+   "  bne-1b\n"
+   "  ori %1,%0,%4\n"
+   "  stdcx.  %1,0,%3\n"
+   "  bne-1b"
+   : "=&r" (pte), "=&r" (tmp), "=m" (*p)
+   : "r" (p), "i" (_PAGE_BUSY)
+   : "cc");
+#else
+__asm__ __volatile__ (
+"1: lwarx   %0,0,%3\n"
+"   andi.   %1,%0,%4\n"
+"   bne-1b\n"
+"   ori %1,%0,%4\n"
+"   stwcx.  %1,0,%3\n"
+"   bne-1b"
+: "=&r" (pte), "=&r" (tmp), "=m" (*p)
+: "r" (p), "i" (_PAGE_BUSY)
+: "cc");
+#endif
+#else
+   pte = pte_val(*p);
+#endif
+
+   if (pte_present(pte)) {
+   pte = pte_mkyoung(pte);
+   if (writing && pte_write(pte))
+   pte = pte_mkdirty(pte);
+   }
+
+   *p = pte;   /* clears _PAGE_BUSY */
+
+   return pte;
+}
+
+static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
+ int writing, unsigned long *pte_sizep)


Looks this function is as same as book3s, so why not improve that as common :)

Tiejun


+{
+   pte_t *ptep;
+   unsigned long ps = *pte_sizep;
+   unsigned int shift;
+
+   ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+   if (!ptep)
+   return __pte(0);
+   if (shift)
+   *pte_sizep = 1ul << shift;
+   else
+   *pte_sizep = PAGE_SIZE;
+
+   if (ps > *pte_sizep)
+   return __pte(0);
+   if (!pte_present(*ptep))
+   return __pte(0);
+
+   return kvmppc_read_update_linux_pte(ptep, writing);
+}
+
  #endif /* __ASM_KVM_BOOKE_H__ */



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/6 v2] kvm: powerpc: allow guest control "G" attribute in mas2

2013-08-01 Thread tiejun.chen

On 08/01/2013 07:12 PM, Bharat Bhushan wrote:

"G" bit in MAS2 indicates whether the page is Guarded.
There is no reason to stop guest setting  "E", so allow him.


Could we merge patch 2 and 3 into only one.

Tiejun



Signed-off-by: Bharat Bhushan 
---
v1->v2
  - no change

  arch/powerpc/kvm/e500.h |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 277cb18..4fd9650 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct 
kvm_vcpu *vcpu)
  #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
  #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
  #define MAS2_ATTRIB_MASK \
- (MAS2_X0 | MAS2_X1 | MAS2_E)
+ (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
  #define MAS3_ATTRIB_MASK \
  (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
   | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 2/6] powerpc/book3e: store critical/machine/debug exception thread info

2013-10-23 Thread tiejun.chen

On 10/19/2013 06:43 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

We need to store thread info to these exception thread info like something
we already did for PPC32.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kernel/exceptions-64e.S |   15 +++
  1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 4d8e57f..07cf657 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -67,6 +67,18 @@
std r10,PACA_##level##_STACK(r13);
  #endif

+/* Store something to exception thread info */
+#defineBOOK3E_STORE_EXC_LEVEL_THEAD_INFO(type) 
\
+   ld  r14,PACAKSAVE(r13); 
\
+   CURRENT_THREAD_INFO(r14, r14);  
\
+   CURRENT_THREAD_INFO(r15, r1);   
\
+   ld  r10,TI_FLAGS(r14);  
\
+   std r10,TI_FLAGS(r15);  
\
+   ld  r10,TI_PREEMPT(r14);
\
+   std r10,TI_PREEMPT(r15);
\
+   ld  r10,TI_TASK(r14);   
\
+   std r10,TI_TASK(r15);


Where is "type" used?



Yes, its noting now but its worth leaving this to extend something in the 
future.


BTW, no need for a BOOK3E prefix for things local to this file.



What about "EXC_LEVEL_EXCEPTION_PROLOG"? Please see next version.

Thanks,

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 1/6] powerpc/book3e: load critical/machine/debug exception stack

2013-10-23 Thread tiejun.chen

On 10/19/2013 06:37 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

We always alloc critical/machine/debug check exceptions. This is
different from the normal exception. So we should load these exception
stack properly like we did for booke.


This is "booke".  Do you mean like "like we did for 32-bit"?


Yes.



And the code is already trying to load the special stack; it just
happens that it's loading from a different location than the C code
placed the stack addresses.  The changelog should point out the specific
thing that is being fixed.


Here I don't fix anything, and I just want to do the same thing as 32-bit.




Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kernel/exceptions-64e.S |   49 +++---
  1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 4b23119..4d8e57f 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -36,6 +36,37 @@
   */
  #define   SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE

+/* only on book3e */
+#define DBG_STACK_BASE dbgirq_ctx
+#define MC_STACK_BASE  mcheckirq_ctx
+#define CRIT_STACK_BASEcritirq_ctx
+
+#ifdef CONFIG_RELOCATABLE
+#define LOAD_STACK_BASE(reg, level)\
+   tovirt(r2,r2);  \
+   LOAD_REG_ADDR(reg, level##_STACK_BASE);
+#else
+#define LOAD_STACK_BASE(reg, level)\
+   LOAD_REG_IMMEDIATE(reg, level##_STACK_BASE);
+#endif
+
+#ifdef CONFIG_SMP
+#define BOOK3E_LOAD_EXC_LEVEL_STACK(level) \
+   mfspr   r14,SPRN_PIR;   \
+   slwir14,r14,3;  \
+   LOAD_STACK_BASE(r10, level);\
+   add r10,r10,r14;\
+   ld  r10,0(r10); \
+   addir10,r10,THREAD_SIZE;\
+   std r10,PACA_##level##_STACK(r13);
+#else
+#define BOOK3E_LOAD_EXC_LEVEL_STACK(level) \
+   LOAD_STACK_BASE(r10, level);\
+   ld  r10,0(r10); \
+   addir10,r10,THREAD_SIZE;\
+   std r10,PACA_##level##_STACK(r13);
+#endif


It looks like you're loading the stack from *irq_ctx, storing it in
PACA_*_stack, and then (immediately after this in the caller) loading it
back from PACA_*_STACK.  Why not just load it from *irq_ctx and get rid
of PACA_*_STACK altogether -- or change the C code to initialize the
addresses in the PACA instead, and get ird of *irq_ctx on 64-bit?


Okay, I'd like to move forward the c code, please see next version.




  /* Exception prolog code for all exceptions */
  #define EXCEPTION_PROLOG(n, intnum, type, addition)   \
mtspr   SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */   \
@@ -68,20 +99,32 @@
  #define SPRN_GDBELL_SRR1  SPRN_GSRR1

  #define CRIT_SET_KSTACK   
\
+   andi.   r10,r11,MSR_PR; 
\
+   bne 1f; 
\
+   BOOK3E_LOAD_EXC_LEVEL_STACK(CRIT);  
\
ld  r1,PACA_CRIT_STACK(r13);\
-   subir1,r1,SPECIAL_EXC_FRAME_SIZE;
+   subir1,r1,SPECIAL_EXC_FRAME_SIZE;   
\


The caller will already check MSR_PR and override this if coming from
userspace; why do you need to check again here?


Looks this is redundant so this will be left out.

Thanks,

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 3/6] book3e/kgdb: update thread's dbcr0

2013-10-23 Thread tiejun.chen

On 10/19/2013 06:57 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

gdb always need to generate a single step properly to invoke
a kgdb state. But with lazy interrupt, book3e can't always
trigger a debug exception with a single step since the current
is blocked for handling those pending exception, then we miss
that expected dbcr configuration at last to generate a debug
exception.


What do you mean by "the current is blocked"?  Could you explain more
clearly what lazy EE has to do with MSR_DE and DBCR0?



I will go another path to make sure the lazy EE doesn't affect KGDB, so please 
see next version.


Thanks,

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 4/6] powerpc/book3e: support kgdb for kernel space

2013-10-23 Thread tiejun.chen

On 10/19/2013 06:58 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

Currently we need to skip this for supporting KGDB.


Does it need to depend on CONFIG_KGDB?  Either you've fixed the "can't
quite save properly" part, or you haven't.


I'm not 100% sure if my change is fine to other scenarios so I have to use this 
guarantee other stuff are still safe. But if you think we can remove this 
dependent, I'm happy to do :)


Thanks,

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 6/6] book3e/kgdb: Fix a single stgep case of lazy IRQ

2013-10-23 Thread tiejun.chen

On 10/19/2013 07:32 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

When we're in kgdb_singlestep(), we have to work around to get
thread_info by copying from the kernel stack before calling
kgdb_handle_exception(), then copying it back afterwards.

But for PPC64, we have a lazy interrupt implementation. So after
copying thread info frome kernle stack, if we need to replay an
interrupt, we shouldn't restore that previous backup thread info
to make sure we can replay an interrupt lately with a proper
thread info.


Explain why copying it would be a problem.



This would be gone away in next version as well :)

Thanks,

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v5][PATCH 1/6] powerpc/book3e: load critical/machine/debug exception stack

2013-10-23 Thread tiejun.chen

On 10/19/2013 07:55 AM, Scott Wood wrote:

On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote:

We always alloc critical/machine/debug check exceptions. This is
different from the normal exception. So we should load these exception
stack properly like we did for booke.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kernel/exceptions-64e.S |   49 +++---
  1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 4b23119..4d8e57f 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -36,6 +36,37 @@
   */
  #define   SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE

+/* only on book3e */
+#define DBG_STACK_BASE dbgirq_ctx
+#define MC_STACK_BASE  mcheckirq_ctx
+#define CRIT_STACK_BASEcritirq_ctx
+
+#ifdef CONFIG_RELOCATABLE
+#define LOAD_STACK_BASE(reg, level)\
+   tovirt(r2,r2);  \
+   LOAD_REG_ADDR(reg, level##_STACK_BASE);


Where does r2 come from here, where does it get used, and why do we need
tovirt() on book3e?



As I remember this should be covered when we boot that capture kernel in 
kexec/kdump case.


Now this is also gone away after move forward the c code.

Thanks,

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/3] powerpc/book3e: load critical/machine/debug exception stack

2012-12-20 Thread tiejun.chen

On 12/19/2012 06:10 AM, Tabi Timur-B04825 wrote:

On Thu, Oct 25, 2012 at 1:43 AM, Tiejun Chen  wrote:

We always alloc critical/machine/debug check exceptions. This is
different from the normal exception. So we should load these exception
stack properly like we did for booke.


Tiejun,

I'm a little confused by these patches, because the actual critical
exception handlers are still commented out:

/* Critical Input Interrupt */
START_EXCEPTION(critical_input);
CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
  PROLOG_ADDITION_NONE)
//  EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
//  bl  special_reg_save_crit
//  CHECK_NAPPING();
//  addir3,r1,STACK_FRAME_OVERHEAD
//  bl  .critical_exception
//  b   ret_from_crit_except
b   .

Are you working on fixing this?  I'm trying to fix it, too, but I


No, I have no approach to Crit/MC and currently I'm focus only on Debug so 
please do this as you expect :)


BTW, I send next version just now since there are something needed to 
correct/improve.


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v1][PATCH 3/6] book3e/kgdb: update thread's dbcr0

2012-12-20 Thread tiejun.chen

On 12/21/2012 02:41 AM, Kumar Gala wrote:


On Dec 20, 2012, at 3:08 AM, Tiejun Chen wrote:


gdb always need to generate a single step properly to invoke
a kgdb state. But with lazy interrupt, book3e can't always
trigger a debug exception with a single step since the current
is blocked for handling those pending exception, then we miss
that expected dbcr configuration at last to generate a debug
exception.

So here we also update thread's dbcr0 to make sure the current
can go back with that missed dbcr0 configuration.

Signed-off-by: Tiejun Chen 
---
arch/powerpc/kernel/kgdb.c |   10 ++
1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index c470a40..516b44b 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -426,8 +426,18 @@ int kgdb_arch_handle_exception(int vector, int signo, int 
err_code,
/* set the trace bit if we're stepping */
if (remcom_in_buffer[0] == 's') {
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#ifdef CONFIG_PPC_BOOK3E


Should this really be CONFIG_PPC64 or CONFIG_PPC_BOOK3E_64?


Yes, I think CONFIG_PPC_BOOK3E_64 is better currently. Because I didn't validate 
this on power arch since I have no a real machine :)





+   /* With lazy interrut we have to update thread dbcr0 
here
+* to make sure we can set debug properly at last to 
invoke
+* kgdb again to work well.
+*/
+   current->thread.dbcr0 =
+   mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM;
+   mtspr(SPRN_DBCR0, current->thread.dbcr0);
+#else
mtspr(SPRN_DBCR0,
  mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#endif


Can we code this so we don't need the #else?


I guess you're saying something like this:

---
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index c470a40..b2df42a 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -409,7 +409,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int 
err_code,

   struct pt_regs *linux_regs)
 {
char *ptr = &remcom_in_buffer[1];
-   unsigned long addr;
+   unsigned long addr, dbcr0;

switch (remcom_in_buffer[0]) {
/*
@@ -426,8 +426,15 @@ int kgdb_arch_handle_exception(int vector, int signo, int 
err_code,

/* set the trace bit if we're stepping */
if (remcom_in_buffer[0] == 's') {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-   mtspr(SPRN_DBCR0,
- mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+   dbcr0 = mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM;
+   mtspr(SPRN_DBCR0, dbcr0);
+#ifdef CONFIG_PPC_BOOK3E_64
+   /* With lazy interrut we have to update thread dbcr0 
here
+* to make sure we can set debug properly at last to 
invoke
+* kgdb again to work well.
+*/
+   current->thread.dbcr0 = dbcr0;
+#endif
linux_regs->msr |= MSR_DE;
 #else
linux_regs->msr |= MSR_SE;
--
1.7.9.5

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][v0][PATCH 1/1] ppc64: ignore interrupts while offline

2013-01-15 Thread tiejun.chen

Please ignore this to check next version.

Tiejun

On 01/15/2013 06:31 PM, Tiejun Chen wrote:

With lazy interrupt, some implementations of hotplug will
get some interrupts even while offline, just ignore these.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kernel/irq.c |6 ++
  1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4f97fe3..dbca574 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -144,6 +144,12 @@ notrace unsigned int __check_irq_replay(void)
 */
unsigned char happened = local_paca->irq_happened;

+   /* Some implementations of hotplug will get some interrupts while
+* offline, just ignore these.
+*/
+   if (cpu_is_offline(smp_processor_id()))
+   return 0;
+
/* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: kernel/kgdb.c: fix memory leakage

2013-02-26 Thread tiejun.chen

On 02/08/2013 10:41 AM, Benjamin Herrenschmidt wrote:

On Thu, 2013-01-31 at 20:04 -0600, Jason Wessel wrote:


diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 8747447..5ca82cd 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -154,12 +154,12 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
  static int kgdb_singlestep(struct pt_regs *regs)
  {
struct thread_info *thread_info, *exception_thread_info;
-   struct thread_info *backup_current_thread_info = \
-   (struct thread_info *)kmalloc(sizeof(struct thread_info), 
GFP_KERNEL);
+   struct thread_info *backup_current_thread_info;




Woh...  This is definitely wrong.  You have found a problem for sure,
but this is not the right way to fix it.

It is not a good idea to kmalloc while single stepping because you can
hang the kernel if you single step any operation in kmalloc().


Ok. I applied the fix because it was obviously correct vs. the previous
version of the code (ie. the kmalloc was already there)


I am in the process of going through all the kgdb mails from the last
few months while I had been away from the project, so I didn't catch
this one and I see it has upstream commit (fefd9e6f8).  I'll submit
another patch to fix this the right way and use a static variable.
This is ok to use a static variable here because this is not something
we can recursively call at a single CPU level.


But a static will be shared by all CPUs or do you mean a per-cpu one ?


If Ben prefers we not burn the memory unless kgdb is active we can
kmalloc / kfree the space we need at the time that kgdb is
initialized.  Else we can go with this patch you see below.  We'll see
what Ben desires.


What about the stack ? thread info isn't very big...



Actually booke already copy the thread_info when we enter the debug exception, 
and I will send a patch to do the same thing for book3e, so I also remove these 
copying action here then we're happy without these nasty stuff :)


Tiejun



Cheers,
Ben.


-
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index a7bc752..bb12c8b 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -151,15 +151,16 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 1;
  }

+static struct thread_info kgdb_backup_thread_info[NR_CPUS];
+
  static int kgdb_singlestep(struct pt_regs *regs)
  {
struct thread_info *thread_info, *exception_thread_info;
-   struct thread_info *backup_current_thread_info;
+   int cpu = raw_smp_processor_id();

if (user_mode(regs))
return 0;

-   backup_current_thread_info = (struct thread_info 
*)kmalloc(sizeof(struct thread_info), GFP_KERNEL);
/*
 * On Book E and perhaps other processors, singlestep is handled on
 * the critical exception stack.  This causes current_thread_info()
@@ -175,7 +176,7 @@ static int kgdb_singlestep(struct pt_regs *regs)

if (thread_info != exception_thread_info) {
/* Save the original current_thread_info. */
-   memcpy(backup_current_thread_info, exception_thread_info, 
sizeof *thread_info);
+   memcpy(&kgdb_backup_thread_info[cpu], exception_thread_info, 
sizeof *thread_info);
memcpy(exception_thread_info, thread_info, sizeof *thread_info);
}

@@ -183,9 +184,8 @@ static int kgdb_singlestep(struct pt_regs *regs)

if (thread_info != exception_thread_info)
/* Restore current_thread_info lastly. */
-   memcpy(exception_thread_info, backup_current_thread_info, 
sizeof *thread_info);
+   memcpy(exception_thread_info, &kgdb_backup_thread_info[cpu], 
sizeof *thread_info);

-   kfree(backup_current_thread_info);
return 1;
  }


-


Thanks,
Jason.




if (user_mode(regs))
return 0;

+   backup_current_thread_info = (struct thread_info 
*)kmalloc(sizeof(struct thread_info), GFP_KERNEL);
/*
 * On Book E and perhaps other processors, singlestep is handled on
 * the critical exception stack.  This causes current_thread_info()
@@ -185,6 +185,7 @@ static int kgdb_singlestep(struct pt_regs *regs)
/* Restore current_thread_info lastly. */
memcpy(exception_thread_info, backup_current_thread_info, 
sizeof *thread_info);

+   kfree(backup_current_thread_info);
return 1;
  }









___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 0/6] powerpc/book3e: make kgdb to work well

2013-02-26 Thread tiejun.chen

On 02/27/2013 11:04 AM, Tiejun Chen wrote:

This patchset is used to support kgdb/gdb on book3e.

Validated on p4080ds and p5040ds with test single step and breakpoint


Please ignore this thread since looks I'm missing to CC Jason :(

Tiejun



v3:

* make work when enable CONFIG_RELOCATABLE
* fix one typo in patch,
   "powerpc/book3e: store critical/machine/debug exception thread info":
ld  r1,PACAKSAVE(r13);
 ->  ld  r14,PACAKSAVE(r13);
* remove copying the thread_info since booke and book3e always copy
   the thead_info now when we enter the debug exception, and so drop
   the v2 patch, "book3e/kgdb: Fix a single stgep case of lazy IRQ"

v2:

* Make sure we cover CONFIG_PPC_BOOK3E_64 safely
* Use LOAD_REG_IMMEDIATE() to load properly
the value of the constant expression in load debug exception stack
* Copy thread infor form the kernel stack coming from usr
* Rebase latest powerpc git tree

v1:
* Copy thread info only when we are from !user mode since we'll get kernel stack
   coming from usr directly.
* remove save/restore EX_R14/EX_R15 since DBG_EXCEPTION_PROLOG already covered
   this.
* use CURRENT_THREAD_INFO() conveniently to get thread.
* fix some typos
* add a patch to make sure gdb can generate a single step properly to invoke a
   kgdb state.
* add a patch to if we need to replay an interrupt, we shouldn't restore that
   previous backup thread info to make sure we can replay an interrupt lately
   with a proper thread info.
* rebase latest powerpc git tree

v0:
This patchset is used to support kgdb for book3e.

--
Tiejun Chen (6):
   powerpc/book3e: load critical/machine/debug exception stack
   powerpc/book3e: store critical/machine/debug exception thread info
   book3e/kgdb: update thread's dbcr0
   powerpc/book3e: support kgdb for kernel space
   kgdb/kgdbts: support ppc64
   powerpc/kgdb: remove copying the thread_info

  arch/powerpc/kernel/exceptions-64e.S |   69 --
  arch/powerpc/kernel/kgdb.c   |   41 +---
  drivers/misc/kgdbts.c|2 +
  3 files changed, 77 insertions(+), 35 deletions(-)

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH]->[PATCH v2] kgdb: Removed kmalloc returned value cast

2013-03-10 Thread tiejun.chen

On 03/11/2013 06:39 AM, Alex Grad wrote:

While at it, check kmalloc return value.

Signed-off-by: Alex Grad 
---
  arch/powerpc/kernel/kgdb.c |5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 5ca82cd..9e81dd8 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -159,7 +159,10 @@ static int kgdb_singlestep(struct pt_regs *regs)
if (user_mode(regs))
return 0;

-   backup_current_thread_info = (struct thread_info 
*)kmalloc(sizeof(struct thread_info), GFP_KERNEL);
+   backup_current_thread_info = kmalloc(sizeof(struct thread_info), 
GFP_KERNEL);
+   if (!backup_current_thread_info)
+   return -ENOMEM;


I already send a kgdb patchset in "[v3][PATCH 6/6] powerpc/kgdb: remove copying 
the thread_info" to remove these stuff since its unnecessary to copy current 
thread_info now.


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH]->[PATCH v2] kgdb: Removed kmalloc returned value cast

2013-03-11 Thread tiejun.chen

On 03/11/2013 09:33 AM, tiejun.chen wrote:

On 03/11/2013 06:39 AM, Alex Grad wrote:

While at it, check kmalloc return value.

Signed-off-by: Alex Grad 
---
  arch/powerpc/kernel/kgdb.c |5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 5ca82cd..9e81dd8 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -159,7 +159,10 @@ static int kgdb_singlestep(struct pt_regs *regs)
  if (user_mode(regs))
  return 0;

-backup_current_thread_info = (struct thread_info *)kmalloc(sizeof(struct
thread_info), GFP_KERNEL);
+backup_current_thread_info = kmalloc(sizeof(struct thread_info),
GFP_KERNEL);
+if (!backup_current_thread_info)
+return -ENOMEM;


I already send a kgdb patchset in "[v3][PATCH 6/6] powerpc/kgdb: remove copying
the thread_info" to remove these stuff since its unnecessary to copy current
thread_info now.


I guess that patchset needs to be reviewed with more time and I also think I can 
leave this copying to be compatible for other platforms in the future, but we 
really should fix this problem I introduced right now. So I'd like to fix this 
with DEFINE_PER_CPU.


I send a patch, "powerpc/kgdb: use DEFINE_PER_CPU to allocate kgdb's 
thread_info", so please take a look at if that version is fine to every one.


Any comments are appreciated.

BTW, I will update that kgdb patchset with this change to resend.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: add a missing label in resume_kernel

2013-04-10 Thread tiejun.chen

On 04/10/2013 04:31 PM, Kevin Hao wrote:

A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete
kprobe and migrate exception frame). This will cause the kernel
branch to an undetermined address if there really has a conflict when
updating the thread flags.

Signed-off-by: Kevin Hao 


Acked-By: Tiejun Chen 



Cc: sta...@vger.kernel.org
---
  arch/powerpc/kernel/entry_64.S | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 256c5bf..ab079ed 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -657,7 +657,7 @@ resume_kernel:
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
addir5,r9,TI_FLAGS
-   ldarx   r4,0,r5
+0: ldarx   r4,0,r5
andcr4,r4,r11
stdcx.  r4,0,r5
bne-0b



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] usb: ehci-fsl: set INCR8 mode only on MPC512x

2013-04-23 Thread tiejun.chen

Sorry, please ignore this temporarily since looks this is already covered in 
tree.

I will look further into this to send another version.

Tiejun

On 04/24/2013 10:55 AM, Tiejun Chen wrote:

commit 761bbcb7, "usb: ehci-fsl: set INCR8 mode for system bus interface
on MPC512x", introduced to fix one MPC5121e (M36P) Errata by setting
INCR8 mode for system bus interface on MPC512x, but we should make sure
this is only valid for MPC512x like other parts of this commit. Otherwise
this would issue other platforms as abnormal without this similar Errata.

Signed-off-by: Tiejun Chen 
---
  drivers/usb/host/ehci-fsl.c |   10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index d81d2fc..f4f2a7b 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -509,7 +509,15 @@ static int ehci_fsl_mpc512x_drv_resume(struct device *dev)
ehci_writel(ehci, ISIPHYCTRL_PXE | ISIPHYCTRL_PHYE,
hcd->regs + FSL_SOC_USB_ISIPHYCTRL);

-   ehci_writel(ehci, SBUSCFG_INCR8, hcd->regs + FSL_SOC_USB_SBUSCFG);
+   if (of_device_is_compatible(dev->parent->of_node,
+   "fsl,mpc5121-usb2-dr")) {
+   /*
+* set SBUSCFG:AHBBRST so that control msgs don't
+* fail when doing heavy PATA writes.
+*/
+   ehci_writel(ehci, SBUSCFG_INCR8,
+   hcd->regs + FSL_SOC_USB_SBUSCFG);
+   }

/* restore EHCI registers */
ehci_writel(ehci, pdata->pm_command, &ehci->regs->command);



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] usb: ehci-fsl: set INCR8 mode only on MPC512x

2013-04-23 Thread tiejun.chen

On 04/24/2013 01:55 PM, Anatolij Gustschin wrote:

On Wed, 24 Apr 2013 10:55:10 +0800
Tiejun Chen  wrote:


commit 761bbcb7, "usb: ehci-fsl: set INCR8 mode for system bus interface
on MPC512x", introduced to fix one MPC5121e (M36P) Errata by setting
INCR8 mode for system bus interface on MPC512x, but we should make sure
this is only valid for MPC512x like other parts of this commit. Otherwise


NAK. It is already only valid for MPC512x.


this would issue other platforms as abnormal without this similar Errata.


This setting is in the ehci_fsl_mpc512x_drv_resume() function which is
not called on other platforms.


Yes, I already notice this and also send a notification to ignore this improper 
patch immediately ;-)


Thanks,

Tiejun

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] kvm:book3e: Fix a build error

2013-04-25 Thread tiejun.chen

On 04/25/2013 08:11 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
ow...@vger.kernel.org] On Behalf Of Tiejun Chen
Sent: Thursday, April 25, 2013 2:46 PM
To: ga...@kernel.crashing.org
Cc: linuxppc-dev@lists.ozlabs.org; kvm-...@vger.kernel.org;
k...@vger.kernel.org
Subject: [PATCH 1/1] kvm:book3e: Fix a build error

Commit cd66cc2e, "powerpc/85xx: Add AltiVec support for e6500", adds
support for AltiVec on a Book-E class processor, but while compiling
in the CONFIG_PPC_BOOK3E_64 and CONFIG_VIRTUALIZATION case, this
introduce the following error:

arch/powerpc/kernel/exceptions-64e.S:402: undefined reference to
`kvmppc_handler_42_0x01B'
arch/powerpc/kernel/built-in.o: In function `exc_altivec_assist_book3e':
arch/powerpc/kernel/exceptions-64e.S:424: undefined reference to
`kvmppc_handler_43_0x01B'
make: *** [vmlinux] Error 1

Looks we should add these altivec kvm handlers.

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kvm/bookehv_interrupts.S |5 +
  1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d6..fa9c78a 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -319,6 +319,11 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
SPRN_DSRR0, SPRN_DSRR1, 0
  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
SPRN_CSRR0, SPRN_CSRR1, 0
+/* altivec */
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
  #else
  /*
   * For input register values, see
arch/powerpc/include/asm/kvm_booke_hv_asm.h
--


It seems that you are not using kvm-ppc-queue branch.


This is just used to fix a build error in powerpc.git when introduce commit 
cd66cc2e, "powerpc/85xx: Add AltiVec support for e6500", in *powerpc.git* as I 
mentioned in this patch head.




I already have a patch ready for this (and AltiVec support is work


This change don't block your AltiVec support for kvm unless you think this 
change is wrong. And especially, we always can reproduce this error with/without 
enabling AltiVec, so I also don't think this should be suspended until support 
e6500 in kvm since kvm based on e5500 should work.


Tiejun


in progress) but we need first to pull e6500 kernel patches from
Linux tree into agraf.git.

-Mike










___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v6][PATCH 0/5] powerpc/book3e: powerpc/book3e: make kgdb to work well

2013-11-18 Thread tiejun.chen

On 10/23/2013 05:31 PM, Tiejun Chen wrote:

Scott,

Tested on fsl-p5040 DS.


Scott,

Any comments to this version?

Tiejun



v6:

* rebase
* change the C code to initialize the exception stack addresses in the PACA 
instead.
* Clear the PACA_IRQ_HARD_DIS force to exit directly from this debug exception
   without replaying interrupt.
* so drop "book3e/kgdb: update thread's dbcr0".

v5:

* rebase on merge branch.

Note the original patch, [ATCH 5/7] kgdb/kgdbts: support ppc64, is already 
merged
by Jason.

v4:

* use DEFINE_PER_CPU to allocate kgdb's thread_info
* add patch 7 to make usre copy thread_info only !__check_irq_replay
* leave "andi.   r14,r11,MSR_PR" out of "#ifndef CONFIG_KGDB"
   since cr0 is still used lately.
* retest

v3:

* make work when enable CONFIG_RELOCATABLE
* fix one typo in patch,
   "powerpc/book3e: store critical/machine/debug exception thread info":
ld  r1,PACAKSAVE(r13);
 ->  ld  r14,PACAKSAVE(r13);
* remove copying the thread_info since booke and book3e always copy
   the thead_info now when we enter the debug exception, and so drop
   the v2 patch, "book3e/kgdb: Fix a single stgep case of lazy IRQ"

v2:

* Make sure we cover CONFIG_PPC_BOOK3E_64 safely
* Use LOAD_REG_IMMEDIATE() to load properly
the value of the constant expression in load debug exception stack
* Copy thread infor form the kernel stack coming from usr
* Rebase latest powerpc git tree

v1:

* Copy thread info only when we are from !user mode since we'll get kernel stack
   coming from usr directly.
* remove save/restore EX_R14/EX_R15 since DBG_EXCEPTION_PROLOG already covered
   this.
* use CURRENT_THREAD_INFO() conveniently to get thread.
* fix some typos
* add a patch to make sure gdb can generate a single step properly to invoke a
   kgdb state.
* add a patch to if we need to replay an interrupt, we shouldn't restore that
   previous backup thread info to make sure we can replay an interrupt lately
   with a proper thread info.
* rebase latest powerpc git tree

v0:

This patchset is used to support kgdb for book3e.


Tiejun Chen (5):
   powerpc/book3e: initialize crit/mc/dbg kernel stack pointers
   powerpc/book3e: store crit/mc/dbg exception thread info
   powerpc/book3e: support kgdb for kernel space
   powerpc/kgdb: use DEFINE_PER_CPU to allocate kgdb's thread_info
   powerpc/book3e/kgdb: Fix a single stgep case of lazy IRQ

  arch/powerpc/kernel/exceptions-64e.S |   26 ++
  arch/powerpc/kernel/kgdb.c   |   13 ++---
  arch/powerpc/kernel/setup_64.c   |   18 --
  3 files changed, 44 insertions(+), 13 deletions(-)

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC][PATCH] powerpc/CoreNet64: compile with CONFIG_E{5, 6}500_CPU well

2013-11-21 Thread tiejun.chen

On 11/21/2013 12:41 AM, Scott Wood wrote:

On Wed, 2013-11-20 at 16:35 +0800, Tiejun Chen wrote:

CONFIG_ALTIVEC is always enabled for CoreNet64.


In the defconfig perhaps, but this isn't a generally true statement.



Yes, but I think we should avoid this probable scenario :)


And if we select CONFIG_E{5,6}500_CPU this may introduce -mcpu=e500mc64
into $CFLAGS. But Altivec and Spe options not allowed with
e500mc64, so :


Sigh.


   CC  arch/powerpc/lib/xor_vmx.o
arch/powerpc/lib/xor_vmx.c:1:0: error: AltiVec not supported in this target
make[1]: *** [arch/powerpc/lib/xor_vmx.o] Error 1
make: *** [arch/powerpc/lib] Error 2

Signed-off-by: Tiejun Chen 
---
  arch/powerpc/lib/Makefile |3 +++
  1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 95a20e1..641a77d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -40,5 +40,8 @@ obj-y += code-patching.o
  obj-y += feature-fixups.o
  obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o

+# Altivec and Spe options not allowed with e500mc64 in GCC.
+ifeq ($(call cc-option-yn,-mcpu=e500mc64),n)
  obj-$(CONFIG_ALTIVEC) += xor_vmx.o
  CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
+endif


This does not seem like the right fix.  What if GCC supports both
-mcpu=e500mc64 and -mcpu=e6500, and we're using the latter?  Or for that


I can understand what you mean, but in current kernel, -mcpu=e500mc64 should be 
excluded from -mcpu=e6500,


arch/powerpc/Makefile:

E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64)
CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU)
CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU))

But unfortunately, another place also use the same option,

lib/raid6/Makefile:

raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
...
ifeq ($(CONFIG_ALTIVEC),y)
altivec_flags := -maltivec -mabi=altivec
endif

Looks we have to do something in this common Makefile file as well, but it may 
be a bit ugly if still judge some cpu-specific flags...


So what about this version?

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 607acf5..872a85c 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -127,7 +127,12 @@ CFLAGS-$(CONFIG_POWER5_CPU) += $(call 
cc-option,-mcpu=power5)
 CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)

+# Altivec and Spe options not allowed with e500mc64 in GCC.
+ifeq ($(CONFIG_ALTIVEC),)
 E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64)
+else
+E5500_CPU := -mcpu=powerpc64
+endif
 CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU)
 CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU))

Tiejun


matter, if we're using -mcpu=whatever-ibm-chip-has-this?

Plus, wouldn't you need to do something to prevent code in that file
from being called?

-Scott






___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: powerpc/hugetlb: BUG: using smp_processor_id() in preemptible

2014-01-17 Thread tiejun.chen

On 01/17/2014 05:23 PM, Nikita Yushchenko wrote:

Hi

While running LTP hugeltb tests on freescale powerpc board, I'm getting

[ 7253.637591] BUG: using smp_processor_id() in preemptible [ ]
code: hugemmap01/9048
[ 7253.637601] caller is free_hugepd_range.constprop.25+0x88/0x1a8
[ 7253.637605] CPU: 1 PID: 9048 Comm: hugemmap01 Not tainted 3.10.20-rt14+ #114
[ 7253.637606] Call Trace:
[ 7253.637617] [cb049d80] [c0007ea4] show_stack+0x4c/0x168 (unreliable)
[ 7253.637624] [cb049dc0] [c031c674] debug_smp_processor_id+0x114/0x134
[ 7253.637628] [cb049de0] [c0016d28] free_hugepd_range.constprop.25+0x88/0x1a8
[ 7253.637632] [cb049e00] [c001711c] hugetlb_free_pgd_range+0x6c/0x168
[ 7253.637639] [cb049e40] [c0117408] free_pgtables+0x12c/0x150
[ 7253.637646] [cb049e70] [c011ce38] unmap_region+0xa0/0x11c
[ 7253.637671] [cb049ef0] [c011f03c] do_munmap+0x224/0x3bc
[ 7253.637676] [cb049f20] [c011f2e0] vm_munmap+0x38/0x5c
[ 7253.637682] [cb049f40] [c000ef88] ret_from_syscall+0x0/0x3c
[ 7253.637686] --- Exception: c01 at 0xff16004

This is on 3.10 based kernel but looks like code in question did not change
since then.

Immediate reason of this dump is usage of smp_processor_id() in hugepd_free(),
which is executed in preemptible context on this path.

Perhaps need to add preempt_disable() / preempt_enable() somewhere.
But what is the proper location for these?



Could you try this?

powerpc/hugetlb: replace __get_cpu_var with get_cpu_var

Replace __get_cpu_var safely with get_cpu_var to avoid
the following call trace:

[ 7253.637591] BUG: using smp_processor_id() in preemptible [ ]
code: hugemmap01/9048
[ 7253.637601] caller is free_hugepd_range.constprop.25+0x88/0x1a8
[ 7253.637605] CPU: 1 PID: 9048 Comm: hugemmap01 Not tainted 3.10.20-rt14+ #114
[ 7253.637606] Call Trace:
[ 7253.637617] [cb049d80] [c0007ea4] show_stack+0x4c/0x168 (unreliable)
[ 7253.637624] [cb049dc0] [c031c674] debug_smp_processor_id+0x114/0x134
[ 7253.637628] [cb049de0] [c0016d28] free_hugepd_range.constprop.25+0x88/0x1a8
[ 7253.637632] [cb049e00] [c001711c] hugetlb_free_pgd_range+0x6c/0x168
[ 7253.637639] [cb049e40] [c0117408] free_pgtables+0x12c/0x150
[ 7253.637646] [cb049e70] [c011ce38] unmap_region+0xa0/0x11c
[ 7253.637671] [cb049ef0] [c011f03c] do_munmap+0x224/0x3bc
[ 7253.637676] [cb049f20] [c011f2e0] vm_munmap+0x38/0x5c
[ 7253.637682] [cb049f40] [c000ef88] ret_from_syscall+0x0/0x3c
[ 7253.637686] --- Exception: c01 at 0xff16004

Signed-off-by: Tiejun Chen
---
 arch/powerpc/mm/hugetlbpage.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fb05b12..42779c0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -400,12 +400,13 @@ static void hugepd_free(struct mmu_gather *tlb, void 
*hugepte)
 {
struct hugepd_freelist **batchp;

-   batchp = &__get_cpu_var(hugepd_freelist_cur);
+   batchp = &get_cpu_var(hugepd_freelist_cur);

if (atomic_read(&tlb->mm->mm_users) < 2 ||
cpumask_equal(mm_cpumask(tlb->mm),
  cpumask_of(smp_processor_id( {
kmem_cache_free(hugepte_cache, hugepte);
+put_cpu_var(hugepd_freelist_cur);
return;
}

@@ -419,6 +420,7 @@ static void hugepd_free(struct mmu_gather *tlb, void 
*hugepte)
call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
*batchp = NULL;
}
+put_cpu_var(hugepd_freelist_cur);
 }
 #endif

--
1.7.9.5

Tiejun



Nikita
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: KGDB panics on p2020 target

2014-01-20 Thread tiejun.chen

On 01/17/2014 03:52 PM, Arun Chandran wrote:

Hi,

I am testing kgdb on freescale p2020 target.

In target


1)
root@freescale-p2020ds:~# uname -a
Linux freescale-p2020ds 3.10.20-rt14+ #9 SMP Thu Jan 16 16:32:15 IST 2014
ppc GNU/Linux

2)
root@freescale-p2020ds:~# cat /proc/cpuinfo
processor   : 0
cpu : e500v2
clock   : 999.990008MHz
revision: 4.0 (pvr 8021 1040)
bogomips: 124.99

processor   : 1
cpu : e500v2
clock   : 999.990008MHz
revision: 4.0 (pvr 8021 1040)
bogomips: 124.99

total bogomips  : 249.99
timebase: 62499376
platform: P2020 DS
model   : fsl,P2020DS
Memory  : 768 MB

3)
freescale-p2020ds:~# echo "ttyS1,115200" >
/sys/module/kgdboc/parameters/kgdoc

4) I set up host (settings given below); Then I send   "SysRq : DEBUG"

In host
--
(gdb) target remote /dev/ttyS0
Remote debugging using /dev/ttyS0
kgdb_breakpoint () at kernel/debug/debug_core.c:1013
1013 arch_kgdb_breakpoint();
(gdb) b sys_sync
Breakpoint 1 at 0xc0167288: file fs/sync.c, line 103.
(gdb) c
Continuing.

I am able to take control in host; after that I am setting breakpoint at
"sys_sync"

In target

root@freescale-p2020ds:~# for i in 1 2 3 4 5 6 7 8 9

do
sync
done


In host
--
Breakpoint 1, sys_sync () at fs/sync.c:103
103 {
(gdb) c
Continuing.

Breakpoint is hit only one time instead of 9 times; after that target hangs.


I recommend you try upstream to take a further look at this, instead of that 
Freescale distribution. As I recall currently KGDB works well in 85xx case in ML.




Then i tried to send "SysRq : DEBUG" in target kernel panics.

I have pasted the panic below.

#
SysRq : DEBUG
Kernel panic - not syncing: Recursive entry to debugger


The kernel already trap into kgdb_handle_exception() with the debug exception 
while triggering that break point, but again you trigger another debug exception 
by SysRq. Actually KGDB can't handle such this recursive behavior, so KGDB 
always call kgdb_reenter_check() to prevent this scenario with this call trace.


static int kgdb_reenter_check(struct kgdb_state *ks)
{
unsigned long addr;

if (atomic_read(&kgdb_active) != raw_smp_processor_id())
return 0;
...

if (exception_level > 1) {
dump_stack();
panic("Recursive entry to debugger");
}


Tiejun  


CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6
Call Trace:
[effe5d10] [c0008060] show_stack+0x4c/0x168 (unreliable)
[effe5d50] [c0588878] panic+0xe4/0x224
[effe5da0] [c00b2cbc] kgdb_handle_exception+0x1d4/0x1f8
[effe5df0] [c0010038] kgdb_handle_breakpoint+0x4c/0x80
[effe5e00] [c057e7e0] program_check_exception+0x10c/0x264
[effe5e10] [c000f660] ret_from_except_full+0x0/0x4c
--- Exception: 700 at sysrq_handle_dbg+0x3c/0xc8
 LR = __handle_sysrq+0x154/0x1cc
[effe5ed0] [c033df5c] __handle_sysrq+0x140/0x1cc (unreliable)
[effe5f00] [c0353ef8] serial8250_rx_chars+0xe8/0x218
[effe5f30] [c0359644] fsl8250_handle_irq+0xac/0x174
[effe5f50] [c0352f9c] serial8250_interrupt+0x40/0xe8
[effe5f70] [c00b5500] handle_irq_event_percpu+0xcc/0x2a8
[effe5fc0] [c00b5720] handle_irq_event+0x44/0x74
[effe5fe0] [c00b8e14] handle_fasteoi_irq+0xd0/0x17c
[effe5ff0] [c000d58c] call_handle_irq+0x18/0x28
[c4f91b10] [c0004f60] do_IRQ+0x150/0x224
[c4f91b40] [c000f6ac] ret_from_except+0x0/0x18
--- Exception: 501 at rpcauth_lookup_credcache+0x138/0x2a4
 LR = rpcauth_lookup_credcache+0xb8/0x2a4
[c4f91c00] [24002424] 0x24002424 (unreliable)
[c4f91c50] [c055cb84] rpcauth_lookupcred+0x64/0xac
[c4f91c80] [c055ce2c] rpcauth_refreshcred+0x11c/0x124
[c4f91cc0] [c055ac80] __rpc_execute+0x8c/0x330
[c4f91d10] [c05540b8] rpc_run_task+0x9c/0xc4
[c4f91d20] [c0554204] rpc_call_sync+0x50/0xb8
[c4f91d50] [c0257164] nfs_proc_getattr+0x48/0x5c
[c4f91d70] [c024aaa4] __nfs_revalidate_inode+0xa8/0x168
[c4f91d90] [c024ac1c] nfs_revalidate_mapping+0xb8/0x194
[c4f91da0] [c0251f00] nfs_follow_link+0x24/0xc8
[c4f91dc0] [c0145280] path_lookupat+0x2f4/0x824
[c4f91e10] [c01457dc] filename_lookup.isra.33+0x2c/0x8c
[c4f91e30] [c0147a74] user_path_at_empty+0x58/0x9c
[c4f91eb0] [c013d5bc] vfs_fstatat+0x54/0xb4
[c4f91ee0] [c013d93c] SyS_stat64+0x1c/0x44
[c4f91f40] [c000eec0] ret_from_syscall+0x0/0x3c
--- Exception: c01 at 0xff08a98
 LR = 0xfed53e8
CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6
Call Trace:
[effe5bb0] [c0008060] show_stack+0x4c/0x168 (unreliable)
[effe5bf0] [c00b2cac] kgdb_handle_exception+0x1c4/0x1f8
[effe5c40] [c0010038] kgdb_handle_breakpoint+0x4c/0x80
[effe5c50] [c057e7e0] program_check_exception+0x10c/0x264
[effe5c60] [c000f660] ret_from_except_full+0x0/0x4c
--- Exception: 700 at kgdb_panic_event+0x1c/0x3c
 LR = notifier_call_chain+0x60/0xb0
[effe5d20] [](nil) (unreliable)
[effe5d40] [c05819dc] __atomic_notifier_call_chain+0x14/0x24
[effe5d50] [c05888a8] panic+0x114/

Re: KGDB panics on p2020 target

2014-01-20 Thread tiejun.chen

On 01/21/2014 02:04 PM, Arun Chandran wrote:

On Mon, Jan 20, 2014 at 1:55 PM, "�tiejun.chen�"
wrote:


On 01/17/2014 03:52 PM, Arun Chandran wrote:


Hi,

I am testing kgdb on freescale p2020 target.

In target


1)
root@freescale-p2020ds:~# uname -a
Linux freescale-p2020ds 3.10.20-rt14+ #9 SMP Thu Jan 16 16:32:15 IST 2014
ppc GNU/Linux

2)
root@freescale-p2020ds:~# cat /proc/cpuinfo
processor   : 0
cpu : e500v2
clock   : 999.990008MHz
revision: 4.0 (pvr 8021 1040)
bogomips: 124.99

processor   : 1
cpu : e500v2
clock   : 999.990008MHz
revision: 4.0 (pvr 8021 1040)
bogomips: 124.99

total bogomips  : 249.99
timebase: 62499376
platform: P2020 DS
model   : fsl,P2020DS
Memory  : 768 MB

3)
freescale-p2020ds:~# echo "ttyS1,115200" >
/sys/module/kgdboc/parameters/kgdoc

4) I set up host (settings given below); Then I send   "SysRq : DEBUG"

In host
--
(gdb) target remote /dev/ttyS0
Remote debugging using /dev/ttyS0
kgdb_breakpoint () at kernel/debug/debug_core.c:1013
1013 arch_kgdb_breakpoint();
(gdb) b sys_sync
Breakpoint 1 at 0xc0167288: file fs/sync.c, line 103.
(gdb) c
Continuing.

I am able to take control in host; after that I am setting breakpoint at
"sys_sync"

In target

root@freescale-p2020ds:~# for i in 1 2 3 4 5 6 7 8 9


do
sync
done



In host
--
Breakpoint 1, sys_sync () at fs/sync.c:103
103 {
(gdb) c
Continuing.

Breakpoint is hit only one time instead of 9 times; after that target
hangs.



I recommend you try upstream to take a further look at this, instead of
that Freescale distribution. As I recall currently KGDB works well in 85xx
case in ML.



Many thanks for your suggestion.

I tested the same thing on linux-3.12.y (
https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/log/?id=refs%2Ftags%2Fv3.12.8&h=linux-3.12.y
).



Please go powerpc tree,

git clone git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git

git checkout next



root@p2020ds:~# uname -a
Linux freescale-p2020ds 3.12.8 #136 SMP Tue Jan 21 11:14:13 IST 2014 ppc
GNU/Linux


The breakpoint I placed at "sys_sync" is hit only once. After that my gdb
command "continue" does not make further
hits. And the target is hung. It is not even responding to my further
sending of "SysRq : DEBUG"

I have attached my .config







Then i tried to send "SysRq : DEBUG" in target kernel panics.

I have pasted the panic below.

#
SysRq : DEBUG
Kernel panic - not syncing: Recursive entry to debugger



The kernel already trap into kgdb_handle_exception() with the debug
exception while triggering that break point, but again you trigger another
debug exception by SysRq. Actually KGDB can't handle such this recursive
behavior, so KGDB always call kgdb_reenter_check() to prevent this scenario
with this call trace.



I am doing it because the target is hung after the initial breakpoint is
hit.


This is just what I mean. Although looks your kernel is hung, actually your 
kernel is already in kgdb exception handler previously. So after you send break 
by SysRq to debug, its not allowed by current kgdb scheme as I said. The 
following call trace also show this path explicitly.


Tiejun





static int kgdb_reenter_check(struct kgdb_state *ks)
{
 unsigned long addr;

 if (atomic_read(&kgdb_active) != raw_smp_processor_id())
 return 0;
 ...

 if (exception_level > 1) {
 dump_stack();
 panic("Recursive entry to debugger");
 }


Tiejun

  CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6

Call Trace:
[effe5d10] [c0008060] show_stack+0x4c/0x168 (unreliable)
[effe5d50] [c0588878] panic+0xe4/0x224
[effe5da0] [c00b2cbc] kgdb_handle_exception+0x1d4/0x1f8
[effe5df0] [c0010038] kgdb_handle_breakpoint+0x4c/0x80
[effe5e00] [c057e7e0] program_check_exception+0x10c/0x264
[effe5e10] [c000f660] ret_from_except_full+0x0/0x4c
--- Exception: 700 at sysrq_handle_dbg+0x3c/0xc8
  LR = __handle_sysrq+0x154/0x1cc
[effe5ed0] [c033df5c] __handle_sysrq+0x140/0x1cc (unreliable)
[effe5f00] [c0353ef8] serial8250_rx_chars+0xe8/0x218
[effe5f30] [c0359644] fsl8250_handle_irq+0xac/0x174
[effe5f50] [c0352f9c] serial8250_interrupt+0x40/0xe8
[effe5f70] [c00b5500] handle_irq_event_percpu+0xcc/0x2a8
[effe5fc0] [c00b5720] handle_irq_event+0x44/0x74
[effe5fe0] [c00b8e14] handle_fasteoi_irq+0xd0/0x17c
[effe5ff0] [c000d58c] call_handle_irq+0x18/0x28
[c4f91b10] [c0004f60] do_IRQ+0x150/0x224
[c4f91b40] [c000f6ac] ret_from_except+0x0/0x18
--- Exception: 501 at rpcauth_lookup_credcache+0x138/0x2a4
  LR = rpcauth_lookup_credcache+0xb8/0x2a4
[c4f91c00] [24002424] 0x24002424 (unreliable)
[c4f91c50] [c055cb84] rpcauth_lookupcred+0x64/0xac
[c4f91c80] [c055ce2c] rpcauth_refres

Re: [v3 PATCH 1/1] booke/kprobe: make program exception to use one dedicated exception stack

2011-08-31 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
>>> As I understand it, the problem comes from the fact that stwu combines the
>>> creation of a stack frame with storing into that stack frame.  If they were
>> Yes.
>>
>>> separate instructions you'd have a new exception frame at a lower address
>>> by the time you actually store to the non-exception frame.
>> So when kprobe we should use a unique stack frame to skip that stack frame 
>> the
>> kprobed stwu want to create.
> 
> I still don't like that patch. Potentially the problem exist for all
> variants of powerpc, not just booke, and I'm not sure I like adding yet

Yes.

> another exception stack.

But I think we should extend easily this for other powerpc variants. And only
when enable CONFIG_KPROBES that dedicated exception stack is valid, so its not
such a big risk :)

> 
> Another (non-great) approach would be to special case stwu to the stack,
> and instead of doing the store while emulating the instruction, keep the
> store address around and do it later, after the stack has been unwound,
> in the exit path (a TIF flag to hit the slow path and then do it in the
> slow path).

Actually I also considered one idea that we do stw-update in the exit path like
your proposal. But I'm not sure if its worth intruding a new TIF flag only for
'stwu'. And if I understand what your exit path means properly, we should do
this on ret_from_except_full,

...
exc_exit_restart:
lwz r11,_NIP(r1)
lwz r12,_MSR(r1)

Looks we have to add something to update as 'stwu' since _NIP/_MSR are 
also
corrupted potentially. So I feel we'll make this complicated if we really do 
here.

exc_exit_start:
mtspr   SPRN_SRR0,r11
mtspr   SPRN_SRR1,r12
REST_2GPRS(11, r1)
lwz r1,GPR1(r1)
.globl exc_exit_restart_end
exc_exit_restart_end:
PPC405_ERR77_SYNC
rfi
b   .   /* prevent prefetch past rfi */

If I'm wrong please correct me.

> 
> It sounds hackish but it makes it easier to fix everybody at once, there
> are "issues" with changing stacks especially on ppc64 and it would
> definitely be affected as well if the stack frame created is larger than
> our gap.

If we provide another exception stack like we did debug exception on ppc64, are
there those "issues" you said?

Thanks
Tiejun

> 
> Cheers,
> Ben.
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system crash/freeze

2011-11-30 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Fri, 2011-07-01 at 18:03 +0800, tiejun.chen wrote:
>> Here emulate_step() is called to emulate 'stwu'. Actually this is equivalent 
>> to
>> 1> update pr_regs->gpr[1] = mem(old r1 + (-A))
>> 2> 'stw , mem<(old r1 + (-A)) >
>>
>> You should notice the stack based on new r1 would be covered with mem> +(-A)>. So after this, the kernel exit from post_krpobe, something would be
>> broken. This should depend on sizeof(-A).
>>
>> For kprobe show_interrupts, you can see pregs->nip is re-written violently so
>> kernel issued.
>>
>> But sometimes we may only re-write some violate registers the kernel still
>> alive. And so this is just why the kernel works well for some kprobed point
>> after you change some kernel options/toolchains.
>>
>> If I'm correct its difficult to kprobe these stwu sp operation since the
>> sizeof(-A) is undermined for the kernel. So we have to implement in-depend
>> interrupt stack like PPC64.
> 
> So I've spent a lot of time trying to find a better way to fix that bug
> and I think I might have finally found one :-)

I can understand what you mean in below since I remember you already 
clarified
this way previously.

> 
>  - When you try to emulate stwcx on the kernel stack (and only there),

I think it should be stwu/stdu.

> don't actually perform the store. Set a TIF flag instead to indicate
> special processing in the exception return path and store the address to
> update somewhere either in a free slot of the stack frame itself of
> somewhere in the thread struct (the former would be easier). You may as
> well do some sanity checking on the value while at it to catch errors
> early.
> 
>  - In the exception return code, we already test for various TIF flags
> (*** see comment below, it's even buggy today for preempt ***), so we
> add a test for that flag and go to do_work.
> 
>  - At the end of do_work, we check for this TIF flag. If it's not set or
> any other flag is set, move on as usual. However, if it's the only flag
> still set:
> 
>  - Copy the exception frame we're about to unwind to just -below- the
> new r1 value we want to write to. Then perform the write, and change
> r1 to point to that copy of the frame.
> 
>  - Branch to restore: which will unwind everything from that copy of
> the frame, and eventually set r1 to GPR(1) in the copy which contains
> the new value of r1.

We still can't restore this there. As you know this emulated store instruction
can touch any filed inside pt_regs. Sometimes nip would be involved for this
problematic location. And unfortunately, this is just that we meet currently. So
we have to go exc_exit_restart.

.globl exc_exit_restart
exc_exit_restart:
lwz r11,_NIP(r1)
lwz r12,_MSR(r1)

I mean we have to do that real restore here. So I'm really not sure if its a
good way to add such a codes including check TIF/copy-get new r1/restore
operation here since this is so deep for the exception return code.

exc_exit_start:
mtspr   SPRN_SRR0,r11
mtspr   SPRN_SRR1,r12

> 
> This is the less intrusive approach and should work just fine, it's also
> more robust than anything I've been able to think of and the approach
> would work for 32 and 64-bit similarily.
> 
> (***) Above comment about a bug: If you look at entry_64.S version of
> ret_from_except_lite you'll notice that in the !preempt case, after
> we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to
> decide whether to go to do_work or not. However, in the preempt case, we
> do a convoluted trick to test SIGPENDING only if PR was set and always
> test NEED_RESCHED ... but we forget to test any other bit of
> _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely
> fail to test for things like single step, syscall tracing, etc...
> 

This is another problem we should address.

> I think this should be fixed at the same time, by simplifying the code
> by doing:
> 
>  - Test PR. If set, go to test_work_user, else continue (or the other
> way around and call it test_work_kernel)
> 
>  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
> go to do_work, maybe call it do_user_work
> 
>  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
> our new flag along with NEED_RESCHED if preempt is enabled and branch to
> do_kernel_work.
> 
> do_user_work is basically the same as today's user_work
> 
> do_kernel_work is basically the same as today preempt block with added
> code to handle the new flag as described above.
> 
> Is anybody volunteering for fixing tha

Re: [BUG?]3.0-rc4+ftrace+kprobe: set kprobe at instruction 'stwu' lead to system crash/freeze

2011-12-01 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Wed, 2011-11-30 at 19:06 +0800, tiejun.chen wrote:
> 
>>>  - Copy the exception frame we're about to unwind to just -below- the
>>> new r1 value we want to write to. Then perform the write, and change
>>> r1 to point to that copy of the frame.
>>>
>>>  - Branch to restore: which will unwind everything from that copy of
>>> the frame, and eventually set r1 to GPR(1) in the copy which contains
>>> the new value of r1.
>> We still can't restore this there.
> 
> Yes, we can since we have copied the pt_regs down to -below- where we
> are going to write to. That's the whole trick. We copy the pt_regs
> somewhere "safe" and restore from there.
> 
>> I mean we have to do that real restore here. So I'm really not sure if its a
>> good way to add such a codes including check TIF/copy-get new r1/restore
>> operation here since this is so deep for the exception return code.
> 
> No. Re-read my explanation.
> 
> In the do_work case (so when things are still easy), we copy the pt_regs
> of the return frame to a safe place (right below where we want to write
> to typically), and change r1 to point to this "new" frame which we use
> to restore from. Then we do the store which is now safe and go to an
> unmodified "restore" exit path.

Do you mean we should push the original pt_regs (or that whole exception stack)
downwards the location the new r1 point? Then its safe to perform this real
emulated stw instruction. At last we will reroute r1 to that copied exception
frame to restore as normal. Right?

Here I suppose so, I implement this for PPC32 based on the above understanding.
I take a validation for kprobing do_fork()/show_interrupts(), now looks fine.
Tomorrow I will go PPC64, and hope its fine as well.

If everything is good I'll send these patches to linuxppc-dev next week.

Cheers
Tiejun

> 
>>> This is the less intrusive approach and should work just fine, it's also
>>> more robust than anything I've been able to think of and the approach
>>> would work for 32 and 64-bit similarily.
>>>
>>> (***) Above comment about a bug: If you look at entry_64.S version of
>>> ret_from_except_lite you'll notice that in the !preempt case, after
>>> we've checked MSR_PR we test for any TIF flag in _TIF_USER_WORK_MASK to
>>> decide whether to go to do_work or not. However, in the preempt case, we
>>> do a convoluted trick to test SIGPENDING only if PR was set and always
>>> test NEED_RESCHED ... but we forget to test any other bit of
>>> _TIF_USER_WORK_MASK !!! So that means that with preempt, we completely
>>> fail to test for things like single step, syscall tracing, etc...
>>>
>> This is another problem we should address.
>>
>>> I think this should be fixed at the same time, by simplifying the code
>>> by doing:
>>>
>>>  - Test PR. If set, go to test_work_user, else continue (or the other
>>> way around and call it test_work_kernel)
>>>
>>>  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
>>> go to do_work, maybe call it do_user_work
>>>
>>>  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
>>> our new flag along with NEED_RESCHED if preempt is enabled and branch to
>>> do_kernel_work.
>>>
>>> do_user_work is basically the same as today's user_work
>>>
>>> do_kernel_work is basically the same as today preempt block with added
>>> code to handle the new flag as described above.
>>>
>>> Is anybody volunteering for fixing that ? I don't have the bandwidth
>> I always use one specific kprobe stack to fix this for BOOKE and work well 
>> in my
>> local tree :) Do you remember my v3 patch? I think its possible to extend 
>> this
>> for all PPC variants.
>>
>> Anyway, I'd like to be this volunteer with our last solution.
> 
> So the second problem I exposed, for which you just volunteered
> (hint :-) is an orthogonal issue not related to kprobe or stacks which
> happen to be something I discovered yesterday while looking at the code.
> 
> As for the solution to the emulation problem, re-read my explanation.
> The whole trick is that we can avoid a separate stack (which I really
> want to avoid) and we can avoid touching the low level restore code
> path.
> 
> Cheers,
> Ben.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
>> We can't emulate stwu since that may corrupt current exception stack.
>> So we will have to do real store operation in the exception return code.
>>
>> Firstly we'll allocate a trampoline exception frame below the kprobed
>> function stack and copy the current exception frame to the trampoline.
>> Then we can do this real store operation to implement 'stwu', and reroute
>> the trampoline frame to r1 to complete this exception migration.
>>
>> Signed-off-by: Tiejun Chen 
>> ---
>>  arch/powerpc/kernel/entry_32.S |   26 ++
>>  1 files changed, 26 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>> index 56212bc..d56e311 100644
>> --- a/arch/powerpc/kernel/entry_32.S
>> +++ b/arch/powerpc/kernel/entry_32.S
>> @@ -1185,6 +1185,8 @@ recheck:
>>  bne-do_resched
>>  andi.   r0,r9,_TIF_USER_WORK_MASK
>>  beq restore_user
>> +andis.  r0,r9,_TIF_DELAYED_KPROBE@h
>> +bne-restore_kprobe
> 
> Same comment as earlier about name. Note that you're not hooking in the
> right place. "recheck" is only reached if you -already- went out of the
> normal exit path and only when going back to user space unless I'm
> missing something (which is really the case you don't care about).
> 
> You need to hook into "resume_kernel" instead.

Maybe I'm misunderstanding what you mean since as I recall you suggestion we
should do this at the end of do_work.

> 
> Also, we may want to simplify the whole thing, instead of checking user
> vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
> which includes both the bits for user work and the new bit for kernel
> work. With preempt, the kernel work bits would also include
> _TIF_NEED_RESCHED.
> 
> Then you have in the common exit path, a single test for that, with a
> fast path that skips everything and just goes to "restore" for both
> kernel and user.
> 
> The only possible issue is the setting of dbcr0 for BookE and 44x and we
> can keep that as a special case keyed of MSR_PR in the resume path under
> ifdef BOOKE (we'll probably sanitize that later with some different
> rework anyway). 
> 
> So the exit path because something like:
> 
> ret_from_except:
>   .. hard disable interrupts (unchanged) ...
>   read TIF flags
>   andi with _TIF_WORK_MASK
>   nothing set -> restore
>   check PR
>   set -> do_work_user
>   no set -> do_work_kernel (kprobes & preempt)
>   (both loop until relevant _TIF flags are all clear)
> restore:
>   #ifdef BOOKE & 44x test PR & do dbcr0 stuff if needed
>   ... nornal restore ...

Do you mean we should reorganize current ret_from_except for ppc32 as well?

> 
>>  do_user_signal: /* r10 contains MSR_KERNEL here */
>>  ori r10,r10,MSR_EE
>>  SYNC
>> @@ -1202,6 +1204,30 @@ do_user_signal:   /* r10 contains 
>> MSR_KERNEL here */
>>  REST_NVGPRS(r1)
>>  b   recheck
>>  
>> +restore_kprobe:
>> +lwz r3,GPR1(r1)
>> +subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
>> */
>> +mr  r4,r1
>> +bl  copy_exc_stack  /* Copy from the original to the trampoline */
>> +
>> +/* Do real stw operation to complete stwu */
>> +mr  r4,r1
>> +addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
>> +lwz r5,GPR1(r1) /* Backup r1 */
>> +stw r4,GPR1(r1) /* Now store that safely */
> 
> The above confuses me. Shouldn't you do instead something like
> 
>   lwz r4,GPR1(r1)
>   subir3,r4,INT_FRAME_SIZE
>   li  r5,INT_FRAME_SIZE
>   bl  memcpy
> 

Anyway I'll try this if you think memcpy is fine/safe in exception return codes.

> To start with, then you need to know the "old" r1 value which may or may
> not be related to your current r1. The emulation code should stash it

If the old r1 is not related to our current r1, it shouldn't be possible to go
restore_kprob since we set that new flag only for the current.

If I'm wrong please correct me :)

Thanks
Tiejun

> into the int frame in an unused slot such as "orig_gpr3" (since that
> only pertains to restarting syscalls which we aren't doing here).
> 
> Then you adjust your r1 and do something like
> 
>   lwz r3,GPR1(r1)
>   lwz r0,ORIG_GPR3(r1)
>   stw r0,0(r3)
> 
> To perform the store, before doing the rest:
>  
>> +/* Reroute the trampoline frame to r1 */
>> +subir5,r5,INT_FRAME_SIZE
>> +mr  r1,r5
>> +
>> +/* Clear _TIF_DELAYED_KPROBE flag */
>> +rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
>> +lwz r0,TI_FLAGS(r9)
>> +rlwinm  r0,r0,0,_TIF_DELAYED_KPROBE
>> +stw r0,TI_FLAGS(r9)
>> +
>> +b   restore
>> +
>>  /*
>>   * We come here when we are at the end of handling an exception
>> 

Re: [PATCH 1/4] powerpc/kprobe: introduce a new thread flag

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
>> We need to add a new thread flag, TIF_KPROBE/_TIF_DELAYED_KPROBE,
>> for handling kprobe operation while exiting exception.
> 
> The basic idea is sane, however the instruction emulation isn't per-se
> kprobe specific. It could be used by xmon too for example. I'd rather
> use a different name, something like TIF_EMULATE_STACK_STORE or

Its good term so I'll use this directly :)

Thanks
Tiejun

> something like that.
> 
> Cheers,
> Ben.
> 
>> Signed-off-by: Tiejun Chen 
>> ---
>>  arch/powerpc/include/asm/thread_info.h |2 ++
>>  1 files changed, 2 insertions(+), 0 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/thread_info.h 
>> b/arch/powerpc/include/asm/thread_info.h
>> index 836f231..3378734 100644
>> --- a/arch/powerpc/include/asm/thread_info.h
>> +++ b/arch/powerpc/include/asm/thread_info.h
>> @@ -112,6 +112,7 @@ static inline struct thread_info 
>> *current_thread_info(void)
>>  #define TIF_FREEZE  14  /* Freezing for suspend */
>>  #define TIF_SYSCALL_TRACEPOINT  15  /* syscall tracepoint 
>> instrumentation */
>>  #define TIF_RUNLATCH16  /* Is the runlatch enabled? */
>> +#define TIF_KPROBE  17  /* Is the delayed kprobe operation? */
>>  
>>  /* as above, but as bit values */
>>  #define _TIF_SYSCALL_TRACE  (1<> @@ -130,6 +131,7 @@ static inline struct thread_info 
>> *current_thread_info(void)
>>  #define _TIF_FREEZE (1<>  #define _TIF_SYSCALL_TRACEPOINT (1<>  #define _TIF_RUNLATCH   (1<> +#define _TIF_DELAYED_KPROBE (1<>  #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
>>   _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
>>  
> 
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/4] ppc32/kprobe: introduce copy_exc_stack

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Mon, 2011-12-12 at 16:50 +0800, Tiejun Chen wrote:
>> We need a copy mechanism to migrate exception stack. But looks copy_page()
>> already implement this well so we can complete copy_exc_stack() based on
>> that directly.
> 
> I'd rather you don't hijack copy_page which is quite sensitive. The
> emulation isn't performance critical so a "dumber" routine would work

Yes, I just think we should introduce good performance so I 'steal' the original
copy_page().

> fine.
> 
> Why not use memcpy ? You can call it from assembly.

I'd like to switch to memcpy.

Thanks
Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread tiejun.chen
Tiejun Chen wrote:
> In entry_64.S version of ret_from_except_lite, you'll notice that
> in the !preempt case, after we've checked MSR_PR we test for any
> TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
> or not. However, in the preempt case, we do a convoluted trick to
> test SIGPENDING only if PR was set and always test NEED_RESCHED ...
> but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
> that means that with preempt, we completely fail to test for things
> like single step, syscall tracing, etc...
> 
> This should be fixed as the following path:
> 
>  - Test PR. If set, go to test_work_user, else continue.
> 
>  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
> go to do_work, maybe call it do_user_work
> 
>  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
> our new flag along with NEED_RESCHED if preempt is enabled and branch to
> do_kernel_work.

Ben,

Any comment for this?

Tiejun

> 
> Signed-off-by: Benjamin Herrenschmidt 
> Signed-off-by: Tiejun Chen 
> ---
>  arch/powerpc/kernel/entry_64.S |   33 +++--
>  1 files changed, 15 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index d834425..9e70b9a 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite)
>   mtmsrd  r9,1/* Update machine state */
>  #endif /* CONFIG_PPC_BOOK3E */
>  
> -#ifdef CONFIG_PREEMPT
> - clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
> - li  r0,_TIF_NEED_RESCHED/* bits to check */
> - ld  r3,_MSR(r1)
> - ld  r4,TI_FLAGS(r9)
> - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
> - rlwimi  r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
> - and.r0,r4,r0/* check NEED_RESCHED and maybe SIGPENDING */
> - bne do_work
> -
> -#else /* !CONFIG_PREEMPT */
>   ld  r3,_MSR(r1) /* Returning to user mode? */
>   andi.   r3,r3,MSR_PR
> - beq restore /* if not, just restore regs and return */
> + bne test_work_user
>  
> + clrrdi  r9,r1,THREAD_SHIFT  /* current_thread_info() */
> + li  r0,_TIF_USER_WORK_MASK
> +#ifdef CONFIG_PREEMPT
> + ori r0,r0,_TIF_NEED_RESCHED
> +#endif
> + ld  r4,TI_FLAGS(r9)
> + and.r0,r4,r0/* check NEED_RESCHED and maybe 
> _TIF_USER_WORK_MASK */
> + bne do_kernel_work
> + b   restore /* if so, just restore regs and return */
> +
> +test_work_user:
>   /* Check current_thread_info()->flags */
>   clrrdi  r9,r1,THREAD_SHIFT
>   ld  r4,TI_FLAGS(r9)
>   andi.   r0,r4,_TIF_USER_WORK_MASK
> - bne do_work
> -#endif
> + bne do_user_work
>  
>  restore:
>  BEGIN_FW_FTR_SECTION
> @@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>   b   .ret_from_except_lite   /* loop back and handle more */
>  #endif
>  
> -do_work:
> +do_kernel_work:
>  #ifdef CONFIG_PREEMPT
> - andi.   r0,r3,MSR_PR/* Returning to user mode? */
> - bne user_work
>   /* Check that preempt_count() == 0 and interrupts are enabled */
>   lwz r8,TI_PREEMPT(r9)
>   cmpwi   cr1,r8,0
> @@ -738,9 +735,9 @@ do_work:
>   bne 1b
>   b   restore
>  
> -user_work:
>  #endif /* CONFIG_PREEMPT */
>  
> +do_user_work:
>   /* Enable interrupts */
>  #ifdef CONFIG_PPC_BOOK3E
>   wrteei  1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt

2011-12-12 Thread tiejun.chen
Benjamin Herrenschmidt wrote:
> On Tue, 2011-12-13 at 13:01 +0800, tiejun.chen wrote:
>> Tiejun Chen wrote:
>>> In entry_64.S version of ret_from_except_lite, you'll notice that
>>> in the !preempt case, after we've checked MSR_PR we test for any
>>> TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
>>> or not. However, in the preempt case, we do a convoluted trick to
>>> test SIGPENDING only if PR was set and always test NEED_RESCHED ...
>>> but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
>>> that means that with preempt, we completely fail to test for things
>>> like single step, syscall tracing, etc...
>>>
>>> This should be fixed as the following path:
>>>
>>>  - Test PR. If set, go to test_work_user, else continue.
>>>
>>>  - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
>>> go to do_work, maybe call it do_user_work
>>>
>>>  - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
>>> our new flag along with NEED_RESCHED if preempt is enabled and branch to
>>> do_kernel_work.
>> Ben,
>>
>> Any comment for this?
> 
> Sorry, I didn't get to review that one yet (nor reply to your newer

I'm nothing, please do this when you're fine completely.

Thanks
Tiejun

> responses), I have very sore eyes and basically had to get off the
> computer. Hopefully I'll be better tomorrow.
> 
> Cheers,
> Ben.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-13 Thread tiejun.chen
>>
>> You need to hook into "resume_kernel" instead.
> 
> Maybe I'm misunderstanding what you mean since as I recall you suggestion we
> should do this at the end of do_work.
>

I regenerate this with hooking into resume_kernel in below.

>> Also, we may want to simplify the whole thing, instead of checking user
>> vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
>> which includes both the bits for user work and the new bit for kernel
>> work. With preempt, the kernel work bits would also include
>> _TIF_NEED_RESCHED.
>>
>> Then you have in the common exit path, a single test for that, with a
>> fast path that skips everything and just goes to "restore" for both
>> kernel and user.
>>
>> The only possible issue is the setting of dbcr0 for BookE and 44x and we
>> can keep that as a special case keyed of MSR_PR in the resume path under
>> ifdef BOOKE (we'll probably sanitize that later with some different
>> rework anyway). 
>>
>> So the exit path because something like:
>>
>> ret_from_except:
>>  .. hard disable interrupts (unchanged) ...
>>  read TIF flags
>>  andi with _TIF_WORK_MASK
>>  nothing set -> restore
>>  check PR
>>  set -> do_work_user
>>  no set -> do_work_kernel (kprobes & preempt)
>>  (both loop until relevant _TIF flags are all clear)
>> restore:
>>  #ifdef BOOKE & 44x test PR & do dbcr0 stuff if needed
>>  ... nornal restore ...
> 
> Do you mean we should reorganize current ret_from_except for ppc32 as well?

I assume it may not necessary to reorganize ret_from_except for *ppc32*.

> 
>>>  do_user_signal:/* r10 contains MSR_KERNEL here */
>>> ori r10,r10,MSR_EE
>>> SYNC
>>> @@ -1202,6 +1204,30 @@ do_user_signal:  /* r10 contains 
>>> MSR_KERNEL here */
>>> REST_NVGPRS(r1)
>>> b   recheck
>>>  
>>> +restore_kprobe:
>>> +   lwz r3,GPR1(r1)
>>> +   subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
>>> */
>>> +   mr  r4,r1
>>> +   bl  copy_exc_stack  /* Copy from the original to the trampoline */
>>> +
>>> +   /* Do real stw operation to complete stwu */
>>> +   mr  r4,r1
>>> +   addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
>>> +   lwz r5,GPR1(r1) /* Backup r1 */
>>> +   stw r4,GPR1(r1) /* Now store that safely */
>> The above confuses me. Shouldn't you do instead something like
>>
>>  lwz r4,GPR1(r1)

Now GPR1(r1) is already pointed with new r1 in emulate_step().

>>  subir3,r4,INT_FRAME_SIZE

Here we need this, 'mr r4,r1', since r1 holds current exception stack.

>>  li  r5,INT_FRAME_SIZE
>>  bl  memcpy

Then the current exception stack is migrated below the kprobed function stack.

stack flow:

--  -> old r1 when hit 'stwu r1, -AA(r1)' in our
^   ^   kprobed function entry.
|   |
|   |> AA allocated for the kprobed function
|   |
|   v
|-  -> new r1, also GPR1(r1). It holds the kprobed
   ^|   function stack , -AA(r1).
   ||
   ||> INT_FRAME_SIZE for program exception
   ||
   |v
---|-  -> r1 is updated to hold program exception stack.
   |
   |
   |> migrate the exception stack (r1) below the
   |kprobed after memcpy with INT_FRAME_SIZE.
   v
-  -> reroute this as r1 for program exception stack.

>>
> 
> Anyway I'll try this if you think memcpy is fine/safe in exception return 
> codes.
> 
>> To start with, then you need to know the "old" r1 value which may or may
>> not be related to your current r1. The emulation code should stash it
> 
> If the old r1 is not related to our current r1, it shouldn't be possible to go
> restore_kprob since we set that new flag only for the current.

If you agree what I say above, please check the follow:
==
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc..b6554c1 100644
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc..b6554c1 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -813,12 +813,40 @@ restore_user:

 #ifdef CONFIG_PREEMPT
b   restore
+#endif

-/* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
/* check current_thread_info->preempt_count */
rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
lwz r0,TI_PREEMPT(r9)
+   andis.  r0,r0,_TIF_EMULATE_STACK_STORE@h
+   beq+restore
+
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* Allocate a trampoline exception 
frame */
+   mr  r4,r1
+   li  r5,INT_FRAME_SIZE
+   bl  memcpy  /* Copy from the 

Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-13 Thread tiejun.chen
Sorry please ignore this email since I'm missing something here :(

Tiejun

tiejun.chen wrote:
>>> You need to hook into "resume_kernel" instead.
>> Maybe I'm misunderstanding what you mean since as I recall you suggestion we
>> should do this at the end of do_work.
>>
> 
> I regenerate this with hooking into resume_kernel in below.
> 
>>> Also, we may want to simplify the whole thing, instead of checking user
>>> vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
>>> which includes both the bits for user work and the new bit for kernel
>>> work. With preempt, the kernel work bits would also include
>>> _TIF_NEED_RESCHED.
>>>
>>> Then you have in the common exit path, a single test for that, with a
>>> fast path that skips everything and just goes to "restore" for both
>>> kernel and user.
>>>
>>> The only possible issue is the setting of dbcr0 for BookE and 44x and we
>>> can keep that as a special case keyed of MSR_PR in the resume path under
>>> ifdef BOOKE (we'll probably sanitize that later with some different
>>> rework anyway). 
>>>
>>> So the exit path because something like:
>>>
>>> ret_from_except:
>>> .. hard disable interrupts (unchanged) ...
>>> read TIF flags
>>> andi with _TIF_WORK_MASK
>>> nothing set -> restore
>>> check PR
>>> set -> do_work_user
>>> no set -> do_work_kernel (kprobes & preempt)
>>> (both loop until relevant _TIF flags are all clear)
>>> restore:
>>> #ifdef BOOKE & 44x test PR & do dbcr0 stuff if needed
>>> ... nornal restore ...
>> Do you mean we should reorganize current ret_from_except for ppc32 as well?
> 
> I assume it may not necessary to reorganize ret_from_except for *ppc32*.
> 
>>>>  do_user_signal:   /* r10 contains MSR_KERNEL here */
>>>>ori r10,r10,MSR_EE
>>>>SYNC
>>>> @@ -1202,6 +1204,30 @@ do_user_signal: /* r10 contains 
>>>> MSR_KERNEL here */
>>>>REST_NVGPRS(r1)
>>>>b   recheck
>>>>  
>>>> +restore_kprobe:
>>>> +  lwz r3,GPR1(r1)
>>>> +  subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
>>>> */
>>>> +  mr  r4,r1
>>>> +  bl  copy_exc_stack  /* Copy from the original to the trampoline */
>>>> +
>>>> +  /* Do real stw operation to complete stwu */
>>>> +  mr  r4,r1
>>>> +  addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
>>>> +  lwz r5,GPR1(r1) /* Backup r1 */
>>>> +  stw r4,GPR1(r1) /* Now store that safely */
>>> The above confuses me. Shouldn't you do instead something like
>>>
>>> lwz r4,GPR1(r1)
> 
> Now GPR1(r1) is already pointed with new r1 in emulate_step().
> 
>>> subir3,r4,INT_FRAME_SIZE
> 
> Here we need this, 'mr r4,r1', since r1 holds current exception stack.
> 
>>> li  r5,INT_FRAME_SIZE
>>> bl  memcpy
> 
> Then the current exception stack is migrated below the kprobed function stack.
> 
> stack flow:
> 
> --  -> old r1 when hit 'stwu r1, -AA(r1)' in our
> ^   ^   kprobed function entry.
> |   |
> |   |> AA allocated for the kprobed function
> |   |
> |   v
> |-  -> new r1, also GPR1(r1). It holds the kprobed
>^|   function stack , -AA(r1).
>||
>||> INT_FRAME_SIZE for program exception
>||
>|v
> ---|-  -> r1 is updated to hold program exception stack.
>|
>|
>|> migrate the exception stack (r1) below the
>|kprobed after memcpy with INT_FRAME_SIZE.
>v
> -  -> reroute this as r1 for program exception stack.
> 
>> Anyway I'll try this if you think memcpy is fine/safe in exception return 
>> codes.
>>
>>> To start with, then you need to know the "old" r1 value which may or may
>>> not be related to your current r1. The emulation code should stash it
>> If the old r1 is not related to our current r1, it shouldn't be possible to 
>

Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-13 Thread tiejun.chen
>> You need to hook into "resume_kernel" instead.
>

I regenerate this with hooking into resume_kernel in below.

> Maybe I'm misunderstanding what you mean since as I recall you suggestion we
> should do this at the end of do_work.
> 
>> Also, we may want to simplify the whole thing, instead of checking user
>> vs. kernel first etc... we could instead have a single _TIF_WORK_MASK
>> which includes both the bits for user work and the new bit for kernel
>> work. With preempt, the kernel work bits would also include
>> _TIF_NEED_RESCHED.
>>
>> Then you have in the common exit path, a single test for that, with a
>> fast path that skips everything and just goes to "restore" for both
>> kernel and user.
>>
>> The only possible issue is the setting of dbcr0 for BookE and 44x and we
>> can keep that as a special case keyed of MSR_PR in the resume path under
>> ifdef BOOKE (we'll probably sanitize that later with some different
>> rework anyway). 
>>
>> So the exit path because something like:
>>
>> ret_from_except:
>>  .. hard disable interrupts (unchanged) ...
>>  read TIF flags
>>  andi with _TIF_WORK_MASK
>>  nothing set -> restore
>>  check PR
>>  set -> do_work_user
>>  no set -> do_work_kernel (kprobes & preempt)
>>  (both loop until relevant _TIF flags are all clear)
>> restore:
>>  #ifdef BOOKE & 44x test PR & do dbcr0 stuff if needed
>>  ... nornal restore ...
> 
> Do you mean we should reorganize current ret_from_except for ppc32 as well?
> 

I assume it may not necessary to reorganize ret_from_except for *ppc32* .

>>>  do_user_signal:/* r10 contains MSR_KERNEL here */
>>> ori r10,r10,MSR_EE
>>> SYNC
>>> @@ -1202,6 +1204,30 @@ do_user_signal:  /* r10 contains 
>>> MSR_KERNEL here */
>>> REST_NVGPRS(r1)
>>> b   recheck
>>>  
>>> +restore_kprobe:
>>> +   lwz r3,GPR1(r1)
>>> +   subir3,r3,INT_FRAME_SIZE; /* Allocate a trampoline exception frame 
>>> */
>>> +   mr  r4,r1
>>> +   bl  copy_exc_stack  /* Copy from the original to the trampoline */
>>> +
>>> +   /* Do real stw operation to complete stwu */
>>> +   mr  r4,r1
>>> +   addir4,r4,INT_FRAME_SIZE/* Get kprobed entry */
>>> +   lwz r5,GPR1(r1) /* Backup r1 */
>>> +   stw r4,GPR1(r1) /* Now store that safely */
>> The above confuses me. Shouldn't you do instead something like
>>
>>  lwz r4,GPR1(r1)

Now GPR1(r1) is already pointed with new r1 in emulate_step().

>>  subir3,r4,INT_FRAME_SIZE

Here we need this, 'mr r4,r1', since r1 holds current exception stack.

>>  li  r5,INT_FRAME_SIZE
>>  bl  memcpy

Then the current exception stack is migrated below the kprobed function stack.

stack flow:

--  -> old r1 when hit 'stwu r1, -AA(r1)' in our
^   ^   kprobed function entry.
|   |
|   |> AA allocated for the kprobed function
|   |
|   v
|-  -> new r1, also GPR1(r1). It holds the kprobed
   ^|   function stack , -AA(r1).
   ||
   ||> INT_FRAME_SIZE for program exception
   ||
   |v
---|-  -> r1 is updated to hold program exception stack.
   |
   |
   |> migrate the exception stack (r1) below the
   |kprobed after memcpy with INT_FRAME_SIZE.
   v
-  -> reroute this as r1 for program exception stack.

>>
> 
> Anyway I'll try this if you think memcpy is fine/safe in exception return 
> codes.
> 
>> To start with, then you need to know the "old" r1 value which may or may
>> not be related to your current r1. The emulation code should stash it
> 
> If the old r1 is not related to our current r1, it shouldn't be possible to go
> restore_kprob since we set that new flag only for the current.
> 
> If I'm wrong please correct me :)

If you agree what I say above, and its also avoid any issue introduced with
orig_gpr3, please check the follow:
=
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc..277029d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -813,9 +813,40 @@ restore_user:

 #ifdef CONFIG_PREEMPT
b   restore
+#endif

-/* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
+#ifdef CONFIG_KPROBES
+   /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+   rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
+   lwz r0,TI_FLAGS(r9)
+   andis.  r0,r0,_TIF_EMULATE_STACK_STORE@h
+   beq+restore_kernel
+
+   addir9,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
+
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception
frame */
+   mr  r4,

Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame

2011-12-15 Thread tiejun.chen
Looks we have to go into 'restore' at last as I said previously. I send v2 based
on your all comments.

>> I assume it may not necessary to reorganize ret_from_except for *ppc32* .
> 
> It might be cleaner but I can do that myself later.
> 

I have this version but I'm not 100% sure if its as you expect :)

#define _TIF_WORK_MASK (_TIF_USER_WORK_MASK | _TIF_EMULATE_STACK_STORE)

==
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 56212bc..e52b586 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -791,41 +791,29 @@ ret_from_except:
SYNC/* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */

-   lwz r3,_MSR(r1) /* Returning to user mode? */
-   andi.   r0,r3,MSR_PR
-   beq resume_kernel
-
 user_exc_return:   /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
lwz r9,TI_FLAGS(r9)
-   andi.   r0,r9,_TIF_USER_WORK_MASK
-   bne do_work
+   andi.   r0,r9,_TIF_WORK_MASK
+   beq restore

-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-   /* Check whether this process has its own DBCR0 value.  The internal
-  debug mode bit tells us that dbcr0 should be loaded. */
-   lwz r0,THREAD+THREAD_DBCR0(r2)
-   andis.  r10,r0,DBCR0_IDM@h
-   bnel-   load_dbcr0
-#endif
+   lwz r3,_MSR(r1) /* Returning to user mode? */
+   andi.   r0,r3,MSR_PR
+   bne do_user_work

 #ifdef CONFIG_PREEMPT
-   b   restore
-
 /* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
/* check current_thread_info->preempt_count */
rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
lwz r0,TI_PREEMPT(r9)
cmpwi   0,r0,0  /* if non-zero, just restore regs and return */
-   bne restore
+   bne 2f
lwz r0,TI_FLAGS(r9)
andi.   r0,r0,_TIF_NEED_RESCHED
-   beq+restore
+   beq+2f
andi.   r0,r3,MSR_EE/* interrupts off? */
-   beq restore /* don't schedule if so */
+   beq 2f  /* don't schedule if so */
 #ifdef CONFIG_TRACE_IRQFLAGS
/* Lockdep thinks irqs are enabled, we need to call
 * preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -844,12 +832,54 @@ resume_kernel:
 */
bl  trace_hardirqs_on
 #endif
-#else
-resume_kernel:
+2:
 #endif /* CONFIG_PREEMPT */

+   /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+   rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
+   lwz r0,TI_FLAGS(r9)
+   andis.  r0,r0,_TIF_EMULATE_STACK_STORE@h
+   beq+restore
+
+   addir9,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
+
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception
frame */
+   mr  r4,r1   /* src:  current exception frame */
+   li  r5,INT_FRAME_SIZE   /* size: INT_FRAME_SIZE */
+   mr  r1,r3   /* Reroute the trampoline frame to r1 */
+   bl  memcpy  /* Copy from the original to the
trampoline */
+
+   /* Do real store operation to complete stwu */
+   lwz r5,GPR1(r1)
+   stw r9,0(r5)
+
+   /* Do real store operation to complete stwu */
+   lwz r5,GPR1(r1)
+   stw r9,0(r5)
+
+   /* Clear _TIF_EMULATE_STACK_STORE flag */
+   rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
+   lis r11,_TIF_EMULATE_STACK_STORE@h
+   addir9,r9,TI_FLAGS
+0: lwarx   r8,0,r9
+   andcr8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+   dcbt0,r9
+#endif
+   stwcx.  r8,0,r9
+   bne-0b
+
/* interrupts are hard-disabled at this point */
 restore:
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+   lwz r3,_MSR(r1) /* Returning to user mode? */
+   andi.   r0,r3,MSR_PR
+   beq 1f
+   /* Check whether this process has its own DBCR0 value.  The internal
+  debug mode bit tells us that dbcr0 should be loaded. */
+   lwz r0,THREAD+THREAD_DBCR0(r2)
+   andis.  r10,r0,DBCR0_IDM@h
+   bnel-   load_dbcr0
+1:
+#endif
+
 #ifdef CONFIG_44x
 BEGIN_MMU_FTR_SECTION
b   1f
@@ -1159,7 +1189,7 @@ global_dbcr0:
.previous
 #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */

-do_work:   /* r10 contains MSR_KERNEL here */
+do_user_work:  /* r10 contains MSR_KERNEL here */
andi.   r0,r9,_TIF_NEED_RESCHED
beq do_user_signal

@@ -1184,7 +1214,7 @@ recheck:
andi.   r0,r9,_TIF_NEED_RESCHED
bne-do_resched
andi.   r0,r9,_TIF_USER_WORK_MASK
-   beq restore_user
+   beq restore
 do_user_signal:/* r10 contains MSR_KERNEL

Re: Linux port availability for p5010 processor

2011-12-15 Thread tiejun.chen
Vineeth wrote:
> found p5020_ds.c in platforms/85xx;
> why is it a part of 85xx directory ? the core of P5020 is E5500 where the
> core of 85xx is e500;
> 

e5500 is e500mc-64bit Power Architecture core.

> Do we have the processor initialization code (start.S, head.S) files, port
> available with linux ?
> 

So it should go head_64.S/entry_64.S with CONFIG_PPC_BOOK3E.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Kernel not booting when supplying boot parameter mem

2011-12-27 Thread tiejun.chen
Arshad, Farrukh wrote:
> Greetings All,
> 
> I have a basic question. I have 512 MB memory. I want my kernel to use only 
> last 128 MB of memory starting from address 0x1000. I have configured the 
> kernel CONFIG_PHYSICAL_START=0x1000 and in kernel boot parameter I have 
> set mem=128M. In this scenario my kernel is not booting and it just stuck 
> after uncompressing it. If I do not provide mem=128M boot parameter my kernel 
> boots fine, but in that case I can not restrict kernel to use only 128M 
> memory. Why supplying mem=128M causing kernel to fail.
> 

Any kernel option else is configured, such as CONFIG_KERNEL_START and
CONFIG_RELOCATABLE?

Or any u-boot argument is reconfigured, such as 'bootm_low', 'bootm_mapsize' and
'bootm_size'.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Kernel not booting when supplying boot parameter mem

2011-12-27 Thread tiejun.chen
Arshad, Farrukh wrote:
> Hi Tiejun,
> 
> Thanks for your response. Yes, I am running two kernels one on each core in 
> SAMP configuration on P1022RDK board. Given is my memory partitioning. Core 0 
> is loading fine but Core 1 is not loading. CONFIG_RELOCATABLE is not set.
> 
> --
> Core  | Base Address  |Size   
> | Uboot parameters
>   | Kernel Configuration  
> |
> -|---|-|--||
> Core 0 (MEL RT Kernel)| 0x,   | 0x1000, - 
> 256 (MB)|bootm_low = 0x,, bootm_size = 0x1000,  | 
> CONFIG_PHYSICAL_START = 0x,, CONFIG_KERNEL_START = 0xC000,
> |
> Core 1 (LTIB Kernel)  |0x1000,| 0x0800, - 
> 128 (MB)|bootm_low = 0x1000,, bootm_size = 0x0800,  | 
> CONFIG_PHYSICAL_START = 0x1000,, CONFIG_KERNEL_START = 0xC000,
> |
> --
> 

Please check if the following commit is already in your kernel:
--
powerpc: Fix memory limits when starting at a non-zero address

memblock_enforce_memory_limit() takes the desired maximum quantity of memory
to end up with, not an address above which memory will not be used.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Problem in getting shared memory access on P1022RDK

2012-01-04 Thread tiejun.chen
Arshad, Farrukh wrote:
> How can I verify if the memory mapped is coherent on both cores. My memory 
> partitioning is given below
> 
> Core  Base AddressSize
> Core 00x, 0x1000, --> 
> CONFIG_PHYSICAL_START = bootm_low = Base Address
> Core 10x1000, 0x0C00, --> 
> CONFIG_PHYSICAL_START = bootm_low = Base Address
> Shared Mem0x1C00, 0x0400,

Was the kernel option, CONFIG_SMP, enabled for both two kernels?

CONFIG_SMP would affect the memory attribute for cache coherency. Maybe you
should make sure if kernel have a appropriate memory attribute by dumping TLB 
entry.

Tiejun

> 
> Regards,
> Farrukh Arshad
> 
> -Original Message-
> From: Scott Wood [mailto:scottw...@freescale.com] 
> Sent: Tuesday, January 03, 2012 10:10 PM
> To: Arshad, Farrukh
> Cc: linuxppc-dev@lists.ozlabs.org
> Subject: Re: Problem in getting shared memory access on P1022RDK
> 
> On 01/03/2012 03:42 AM, Arshad, Farrukh wrote:
>> Adding more to it,
>>
>>  
>>
>> When I write from Core 1 on the shared memory region it is visible at 
>> Core 0 and it can read what I have written from Core 1 but when I 
>> write from Core 0 on this shared memory it is not visible on Core 1.
> 
> Is the memory mapped coherent on both cores?
> 
> -Scott
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 0/3] ppc32/kprobe: Fix a bug for kprobe stwu r1

2012-01-10 Thread tiejun.chen
Tiejun Chen wrote:
> Changes from V1:
> 
> * use memcpy simply to withdraw copy_exc_stack
> * add !(regs->msr & MSR_PR)) and
>   WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
>   to make sure we're in goot path.
> * move this migration process inside 'restore'
> * clear TIF flag atomically 

Ben,

Is this series OK?

Thanks
Tiejun

> 
> Tiejun Chen (3):
>   powerpc/kprobe: introduce a new thread flag
>   ppc32/kprobe: complete kprobe and migrate exception frame
>   ppc32/kprobe: don't emulate store when kprobe stwu r1
> 
>  arch/powerpc/include/asm/thread_info.h |3 ++
>  arch/powerpc/kernel/entry_32.S |   35 
> 
>  arch/powerpc/lib/sstep.c   |   25 +-
>  3 files changed, 61 insertions(+), 2 deletions(-)
> 
> Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Problem in getting shared memory access on P1022RDK

2012-01-12 Thread tiejun.chen
Arshad, Farrukh wrote:
> Adding more it, 
> 
> I have removed the shared memory kernel driver dependency just to narrow down 
> the problem area and I have written a small piece of code in user space. A 
> writer & a reader application which access the shared memory and I got the 
> same behavior as with the shared memory kernel driver. Interestingly, my user 
> space application work fine on P1022DS but not on P1022RDK however both using 
> the same CPU modules.
> 
> When I write a simple string on shared memory from Core 1 it is read at Core 
> 0 properly
> When I write a simple string on shared memory from Core 0 it is not read at 
> Core 1. 
>

Did you dump TLB entry to check page memory coherence attribute for a shared
memory as I mentioned previously? This should be consistent on both sides.

> With this test now I am sure the problem lies in the kernel itself. Any 
> pointers to look for the troubled area ?
> 
> My application code is (error checking and other code is omitted)
> 
> #define   SHM_BASE0x1C00
> #define   SHM_SIZE0x40// 4 MB of Shared Memory
> #define   PAGE_SIZE   (4*1024)
> 
> fd = open(device, O_RDWR);

You may need to add with 'O_SYNC'.

Tiejun

> 
> shm = malloc(SHM_SIZE + (PAGE_SIZE - 1));
> if ( (unsigned long) shm % PAGE_SIZE) {
>   shm += PAGE_SIZE - ((unsigned long)shm % PAGE_SIZE);
> }
> 
> shm = mmap(shm, SHM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 
> SHM_BASE);
> ..
> .. write some string at shm.
> 
> My memory partitioning for both systems is 
> 
> Core  Base AddressSize
> Core 00x, 0x1000,
> Core 10x1000, 0x0C00,
> Shared Memory 0x1C00, 0x0400,
> 
> Regards,
> Farrukh Arshad.
> Mentor Graphics Pakistan

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Problem in getting shared memory access on P1022RDK

2012-01-28 Thread tiejun.chen
Arshad, Farrukh wrote:
> I have dumped TLB entries while mapping shared memory. On both cores M-Bit 
> (MAS2[61]) is set in TLB0 entries. On both cores M-Bit is set for all valid 
> TLB1 entries. TLB1 does contains some invalid entries which has M-Bit 
> cleared. So I believe at this time the coherency is not the issue. Any 
> further thoughts on the issue ?
>

Did you check all associated TLBx VPN/RPN and attribute setting are same between
two scenarios: W -> Core0 & R <- Core1 and W -> Core1 & R <- Core0?

Can you send me your TLB dump log separately if possible?

And did you try this flag 'O_SYNC'?

Tiejun

> I have modified dump_tlb_book3e function (found in arch/powerpc/xmon/xmon.c) 
> function for BookE MMU to dump TLB entries.
> 
> Regards,
> Farrukh Arshad
> 
> 
> -Original Message-
> From: tiejun.chen [mailto:tiejun.c...@windriver.com] 
> Sent: Thursday, January 12, 2012 1:09 PM
> To: Arshad, Farrukh
> Cc: Scott Wood; linuxppc-dev@lists.ozlabs.org
> Subject: Re: Problem in getting shared memory access on P1022RDK
> 
> Arshad, Farrukh wrote:
>> Adding more it,
>>
>> I have removed the shared memory kernel driver dependency just to narrow 
>> down the problem area and I have written a small piece of code in user 
>> space. A writer & a reader application which access the shared memory and I 
>> got the same behavior as with the shared memory kernel driver. 
>> Interestingly, my user space application work fine on P1022DS but not on 
>> P1022RDK however both using the same CPU modules.
>>
>> When I write a simple string on shared memory from Core 1 it is read 
>> at Core 0 properly When I write a simple string on shared memory from Core 0 
>> it is not read at Core 1.
>>
> 
> Did you dump TLB entry to check page memory coherence attribute for a shared 
> memory as I mentioned previously? This should be consistent on both sides.
> 
>> With this test now I am sure the problem lies in the kernel itself. Any 
>> pointers to look for the troubled area ?
>>
>> My application code is (error checking and other code is omitted)
>>
>> #define  SHM_BASE0x1C00
>> #define  SHM_SIZE0x40// 4 MB of Shared Memory
>> #define  PAGE_SIZE   (4*1024)
>>
>> fd = open(device, O_RDWR);
> 
> You may need to add with 'O_SYNC'.
> 
> Tiejun
> 
>> shm = malloc(SHM_SIZE + (PAGE_SIZE - 1)); if ( (unsigned long) shm % 
>> PAGE_SIZE) {
>>  shm += PAGE_SIZE - ((unsigned long)shm % PAGE_SIZE); }
>>
>> shm = mmap(shm, SHM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | 
>> MAP_FIXED, fd, SHM_BASE); ..
>> .. write some string at shm.
>>
>> My memory partitioning for both systems is
>>
>> Core Base AddressSize
>> Core 0   0x, 0x1000,
>> Core 1   0x1000, 0x0C00,
>> Shared Memory0x1C00, 0x0400,
>>
>> Regards,
>> Farrukh Arshad.
>> Mentor Graphics Pakistan
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] powerpc: add "memory" attribute for mfmsr()

2012-07-10 Thread tiejun.chen
On 07/10/2012 04:19 PM, Benjamin Herrenschmidt wrote:
> On Tue, 2012-07-10 at 15:59 +0800, Tiejun Chen wrote:
>> Add "memory" attribute in inline assembly language as a compiler
>> barrier to make sure 4.6.x GCC don't reorder mfmsr().
> 
> Out of curiosity, did you see a case where it was re-ordered
> improperly ?

Yes.

In my scenario, there's one simple wrapper from local_irq_save().
--
uint32_t XX_DisableAllIntr(void)
{
unsigned long flags;

local_irq_save(flags);

return (uint32_t)flags;
}

When CONFIG_TRACE_IRQFLAGS_SUPPORT is enabled, kernel would expand
local_irq_save(). Firstly, please refer to the follows:

#define local_irq_save(flags)   \
do {\
raw_local_irq_save(flags);  \
trace_hardirqs_off();   \
} while (0)

#define raw_local_irq_save(flags)   \
do {\
typecheck(unsigned long, flags);\
flags = arch_local_irq_save();  \
} while (0)

static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
#ifdef CONFIG_BOOKE
asm volatile("wrteei 0" : : : "memory");
#else
SET_MSR_EE(flags & ~MSR_EE);
#endif
return flags;
}

static inline unsigned long arch_local_save_flags(void)
{
return mfmsr();
}

So local_irq_save(flags) is extended as something like:

{
#1 flags = mfmsr();
#2 asm volatile("wrteei 0" : : : "memory");
#3 trace_hardirqs_off();
}

But build kernel with our 5.0 toolchain (with/without
--with-toolchain-dir=wr-toolchain),

(gdb) disassemble XX_DisableAllIntr
Dump of assembler code for function XX_DisableAllIntr:
   0xc04550c0 <+0>: mflrr0
   0xc04550c4 <+4>: stw r0,4(r1)
   0xc04550c8 <+8>: bl  0xc000f060 
   0xc04550cc <+12>:stwur1,-16(r1)
   0xc04550d0 <+16>:mflrr0
   0xc04550d4 <+20>:stw r0,20(r1)
   0xc04550d8 <+24>:wrteei  0
   0xc04550dc <+28>:bl  0xc00c6c10 
   0xc04550e0 <+32>:mfmsr   r3
   0xc04550e4 <+36>:lwz r0,20(r1)
   0xc04550e8 <+40>:mtlrr0
   0xc04550ec <+44>:addir1,r1,16
   0xc04550f0 <+48>:blr
End of assembler dump.

You should notice #2 and #3 is reorganized before #1. This means irq is always
disabled before we check irq status via reading msr. Then kernel would work as
abnormal sometimes.

But with old toolchain (4.5.x) for this same kernel, everything is fine.

Tiejun

> 
> Cheers,
> Ben.
> 
>> Signed-off-by: Tiejun Chen 
>> ---
>>  arch/powerpc/include/asm/reg.h |3 ++-
>>  1 files changed, 2 insertions(+), 1 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
>> index 559da19..578e5a0 100644
>> --- a/arch/powerpc/include/asm/reg.h
>> +++ b/arch/powerpc/include/asm/reg.h
>> @@ -1016,7 +1016,8 @@
>>  /* Macros for setting and retrieving special purpose registers */
>>  #ifndef __ASSEMBLY__
>>  #define mfmsr() ({unsigned long rval; \
>> -asm volatile("mfmsr %0" : "=r" (rval)); rval;})
>> +asm volatile("mfmsr %0" : "=r" (rval) : \
>> +: "memory"); rval;})
>>  #ifdef CONFIG_PPC_BOOK3S_64
>>  #define __mtmsrd(v, l)  asm volatile("mtmsrd %0," __stringify(l) \
>>   : : "r" (v) : "memory")
> 
> 
> 
> 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] powerpc: add "memory" attribute for mfmsr()

2012-07-10 Thread tiejun.chen
On 07/10/2012 04:22 PM, tiejun.chen wrote:
> On 07/10/2012 04:19 PM, Benjamin Herrenschmidt wrote:
>> On Tue, 2012-07-10 at 15:59 +0800, Tiejun Chen wrote:
>>> Add "memory" attribute in inline assembly language as a compiler
>>> barrier to make sure 4.6.x GCC don't reorder mfmsr().
>>
>> Out of curiosity, did you see a case where it was re-ordered
>> improperly ?
> 
> Yes.
> 
> In my scenario, there's one simple wrapper from local_irq_save().
> --
> uint32_t XX_DisableAllIntr(void)
> {
> unsigned long flags;
> 
> local_irq_save(flags);
> 
> return (uint32_t)flags;
> }
> 
> When CONFIG_TRACE_IRQFLAGS_SUPPORT is enabled, kernel would expand
> local_irq_save(). Firstly, please refer to the follows:
> 
> #define local_irq_save(flags)   \
> do {\
> raw_local_irq_save(flags);  \
> trace_hardirqs_off();   \
> } while (0)
> 
> #define raw_local_irq_save(flags)   \
> do {\
> typecheck(unsigned long, flags);\
> flags = arch_local_irq_save();  \
> } while (0)
> 
> static inline unsigned long arch_local_irq_save(void)
> {
> unsigned long flags = arch_local_save_flags();
> #ifdef CONFIG_BOOKE
> asm volatile("wrteei 0" : : : "memory");
> #else
> SET_MSR_EE(flags & ~MSR_EE);
> #endif
> return flags;
> }
> 
> static inline unsigned long arch_local_save_flags(void)
> {
> return mfmsr();
> }
> 
> So local_irq_save(flags) is extended as something like:
> 
> {
>   #1 flags = mfmsr();
>   #2 asm volatile("wrteei 0" : : : "memory");
>   #3 trace_hardirqs_off();
> }
> 
> But build kernel with our 5.0 toolchain (with/without

Note here this toolchain is based on 4.6.3.

Tiejun

> --with-toolchain-dir=wr-toolchain),
> 
> (gdb) disassemble XX_DisableAllIntr
> Dump of assembler code for function XX_DisableAllIntr:
>0xc04550c0 <+0>:   mflrr0
>0xc04550c4 <+4>:   stw r0,4(r1)
>0xc04550c8 <+8>:   bl  0xc000f060 
>0xc04550cc <+12>:  stwur1,-16(r1)
>0xc04550d0 <+16>:  mflrr0
>0xc04550d4 <+20>:  stw r0,20(r1)
>0xc04550d8 <+24>:  wrteei  0
>0xc04550dc <+28>:  bl  0xc00c6c10 
>0xc04550e0 <+32>:  mfmsr   r3
>0xc04550e4 <+36>:  lwz r0,20(r1)
>0xc04550e8 <+40>:  mtlrr0
>0xc04550ec <+44>:  addir1,r1,16
>0xc04550f0 <+48>:  blr
> End of assembler dump.
> 
> You should notice #2 and #3 is reorganized before #1. This means irq is always
> disabled before we check irq status via reading msr. Then kernel would work as
> abnormal sometimes.
> 
> But with old toolchain (4.5.x) for this same kernel, everything is fine.
> 
> Tiejun
> 
>>
>> Cheers,
>> Ben.
>>
>>> Signed-off-by: Tiejun Chen 
>>> ---
>>>  arch/powerpc/include/asm/reg.h |3 ++-
>>>  1 files changed, 2 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
>>> index 559da19..578e5a0 100644
>>> --- a/arch/powerpc/include/asm/reg.h
>>> +++ b/arch/powerpc/include/asm/reg.h
>>> @@ -1016,7 +1016,8 @@
>>>  /* Macros for setting and retrieving special purpose registers */
>>>  #ifndef __ASSEMBLY__
>>>  #define mfmsr()({unsigned long rval; \
>>> -   asm volatile("mfmsr %0" : "=r" (rval)); rval;})
>>> +   asm volatile("mfmsr %0" : "=r" (rval) : \
>>> +   : "memory"); rval;})
>>>  #ifdef CONFIG_PPC_BOOK3S_64
>>>  #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \
>>>  : : "r" (v) : "memory")
>>
>>
>>
>>
> 
> 
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
> 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path

2012-08-05 Thread tiejun.chen
On 07/30/2012 04:15 PM, Tiejun Chen wrote:
> We miss that correct WDIOC_GETSUPPORT return path when perform
> copy_to_user() properly.

Any comments?

Thanks
Tiejun

> 
> Signed-off-by: Tiejun Chen 
> ---
>  drivers/watchdog/booke_wdt.c |7 ---
>  1 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
> index 3fe82d0..2be7f29 100644
> --- a/drivers/watchdog/booke_wdt.c
> +++ b/drivers/watchdog/booke_wdt.c
> @@ -162,12 +162,13 @@ static long booke_wdt_ioctl(struct file *file,
>   unsigned int cmd, unsigned long arg)
>  {
>   u32 tmp = 0;
> - u32 __user *p = (u32 __user *)arg;
> + void __user *argp = (u32 __user *)arg;
> + u32 __user *p = argp;
>  
>   switch (cmd) {
>   case WDIOC_GETSUPPORT:
> - if (copy_to_user((void *)arg, &ident, sizeof(ident)))
> - return -EFAULT;
> + return copy_to_user(argp, &ident,
> + sizeof(ident)) ? -EFAULT : 0;
>   case WDIOC_GETSTATUS:
>   return put_user(0, p);
>   case WDIOC_GETBOOTSTATUS:
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path

2012-08-06 Thread tiejun.chen
On 08/07/2012 09:19 AM, Tabi Timur-B04825 wrote:
> On Mon, Aug 6, 2012 at 2:12 PM, Tabi Timur-B04825  
> wrote:
>> On Mon, Jul 30, 2012 at 3:15 AM, Tiejun Chen  
>> wrote:
>>> We miss that correct WDIOC_GETSUPPORT return path when perform
>>> copy_to_user() properly.
>>
>> Thanks for catching this.  I'm amazed that this driver still has bugs like 
>> this.
> 
> While you're at it, I found a few related bugs.  Can you fix these, also?
> 
> 1.case WDIOC_SETOPTIONS:
>   if (get_user(tmp, p))
>   return -EINVAL;
> 
> This should return -EFAULT.
> 
> 2.case WDIOC_GETBOOTSTATUS:
>   /* XXX: something is clearing TSR */
>   tmp = mfspr(SPRN_TSR) & TSR_WRS(3);
>   /* returns CARDRESET if last reset was caused by the WDT */
>   return (tmp ? WDIOF_CARDRESET : 0);
> 
> This should use put_user() to return the value, instead of returning
> it as a return code.
> 
> You can title the new patch something like, "booke/wdt: some ioctls do
> not return values properly"

Will regenerate this patch including these error as v2.

Thanks
Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/3] powerpc: Bail out of KGDB when we've been triggered

2012-08-22 Thread tiejun.chen
On 08/22/2012 11:07 PM, Tabi Timur-B04825 wrote:
> On Wed, Aug 22, 2012 at 5:43 AM, Tiejun Chen  
> wrote:
> 
>> +int kgdb_skipexception(int exception, struct pt_regs *regs)
>> +{
>> +   if (kgdb_isremovedbreak(regs->nip))
>> +   return 1;
>> +
>> +   return 0;
>> +}
> 
> int kgdb_skipexception(int exception, struct pt_regs *regs)
> {
> return !!kgdb_isremovedbreak(regs->nip));
> }
> 
> If the caller only cares about zero vs. non-zero, you can drop the !!.

Yes, so 'return kgdb_isremovedbreak(regs->nip);' is already fine and simple.

I'll update this as v2 so thanks your comment.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v2][PATCH 3/3] powerpc/kgdb: restore current_thread_info properly

2012-08-22 Thread tiejun.chen
On 08/23/2012 11:14 AM, Nicholas A. Bellinger wrote:
> On Thu, 2012-08-23 at 10:10 +0800, Tiejun Chen wrote:
>> For powerpc BooKE and e200, singlestep is handled on the critical/dbg
>> exception stack. This causes current_thread_info() to fail for kgdb
>> internal, so previously We work around this issue by copying
>> the thread_info from the kernel stack before calling kgdb_handle_exception,
>> and copying it back afterwards.
>>
>> But actually we don't do this properly. We should backup current_thread_info
>> then restore that when exit.
>>
>> Signed-off-by: Tiejun Chen 
>> ---
>> v2: fix a typo in patch head description.
>>
>>  arch/powerpc/kernel/kgdb.c |   11 +--
>>  1 files changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
>> index 05adb69..c470a40 100644
>> --- a/arch/powerpc/kernel/kgdb.c
>> +++ b/arch/powerpc/kernel/kgdb.c
>> @@ -25,6 +25,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  /*
>>   * This table contains the mapping between PowerPC hardware trap types, and
>> @@ -153,6 +154,8 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
>>  static int kgdb_singlestep(struct pt_regs *regs)
>>  {
>>  struct thread_info *thread_info, *exception_thread_info;
>> +struct thread_info *backup_current_thread_info = \
>> +(struct thread_info *)kmalloc(sizeof(struct thread_info), 
>> GFP_KERNEL);
>>  
> 
> Looks like a rouge '\' in the above assignment..

Remove that :)

Thanks
Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame

2012-09-12 Thread tiejun.chen

On 09/11/2012 01:51 PM, Benjamin Herrenschmidt wrote:

On Tue, 2012-09-11 at 10:20 +0800, Tiejun Chen wrote:

We can't emulate stwu since that may corrupt current exception stack.
So we will have to do real store operation in the exception return code.

Firstly we'll allocate a trampoline exception frame below the kprobed
function stack and copy the current exception frame to the trampoline.
Then we can do this real store operation to implement 'stwu', and reroute
the trampoline frame to r1 to complete this exception migration.


Ok, so not quite there yet :-)

See below:


Signed-off-by: Tiejun Chen 
---
  arch/powerpc/kernel/entry_32.S |   45 ++--
  arch/powerpc/kernel/entry_64.S |   32 
  2 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ead5016..6cfe12f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -831,19 +831,54 @@ restore_user:
bnel-   load_dbcr0
  #endif

-#ifdef CONFIG_PREEMPT
b   restore

  /* N.B. the only way to get here is from the beq following ret_from_except. */
  resume_kernel:
+   /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+   CURRENT_THREAD_INFO(r9, r1)
+   lwz r0,TI_FLAGS(r9)
+   andis.  r0,r0,_TIF_EMULATE_STACK_STORE@h
+   beq+1f


So you used r0 to load the TI_FLAGS and immediately clobbered it in
andis. forcing you to re-load them later down. Instead, put them in r8

lwz r8,TI_FLAGS(r9)
andis.  r0,r8,_TIF_*
beq+*


+   addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */


Then you put your entry in r8 


I'll update this for 32b and 64b sections.




+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception 
frame */
+   mr  r4,r1   /* src:  current exception frame */
+   li  r5,INT_FRAME_SIZE   /* size: INT_FRAME_SIZE */
+   mr  r1,r3   /* Reroute the trampoline frame to r1 */
+   bl  memcpy  /* Copy from the original to the 
trampoline */


Which you just clobbered... oops :-)

So you need to store that old r1 somewhere fist then retrieve it
after the memcpy call. That or open-code the memcpy to avoid all
the clobbering problems.


Maybe we can use copy_and_flush() since looks copy_and_flush() only clobber r0, 
r6 and LR explicitly.


I'll resync these comments for v4.

Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame

2012-09-12 Thread tiejun.chen

On 09/12/2012 04:43 PM, Benjamin Herrenschmidt wrote:

On Wed, 2012-09-12 at 16:38 +0800, tiejun.chen wrote:

So you need to store that old r1 somewhere fist then retrieve it
after the memcpy call. That or open-code the memcpy to avoid all
the clobbering problems.


Maybe we can use copy_and_flush() since looks copy_and_flush() only
clobber r0,
r6 and LR explicitly.

I'll resync these comments for v4.


I'd say just open code it. You already have src and dst in registers,
the length can easily be put in ctr... easier that way, not clobbering


ctr should be easier :)


to worry about nor stack frame to create etc...


If you don't like this v4, let me know and then I can go back memcpy for next 
version.


Tiejun
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame

2012-09-13 Thread tiejun.chen

On 09/12/2012 06:38 PM, Benjamin Herrenschmidt wrote:

On Wed, 2012-09-12 at 16:55 +0800, tiejun.chen wrote:

to worry about nor stack frame to create etc...


If you don't like this v4, let me know and then I can go back memcpy
for next
version.


Just open code the whole copy. It should be easy really. As I said, you
have the src and dst already in registers and you know they are aligned,
so just put the size of the frame in a register (divided by 4), do an
mtctr and do a little load_update/store_update loop to do the copy, all
in the asm.


Is the following Okay?

---
 arch/powerpc/kernel/entry_32.S |   55 +++-
 arch/powerpc/kernel/entry_64.S |   45 
 2 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ead5016..3b56bba 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 

 #undef SHOW_SYSCALLS
 #undef SHOW_SYSCALLS_TASK
@@ -831,19 +832,63 @@ restore_user:
bnel-   load_dbcr0
 #endif

-#ifdef CONFIG_PREEMPT
b   restore

 /* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
-   /* check current_thread_info->preempt_count */
+   /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
CURRENT_THREAD_INFO(r9, r1)
+   lwz r8,TI_FLAGS(r9)
+   andis.  r8,r8,_TIF_EMULATE_STACK_STORE@h
+   beq+1f
+
+   addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
+
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception 
frame */
+   mr  r4,r1   /* src:  current exception frame */
+   li  r5,INT_FRAME_SIZE   /* size: INT_FRAME_SIZE */
+   li  r6,0/* start offset: 0 */
+   mr  r1,r3   /* Reroute the trampoline frame to r1 */
+
+   /* Copy from the original to the trampoline. */
+   addir5,r5,-4
+   addir6,r6,-4
+4: li  r0,L1_CACHE_BYTES/4
+   mtctr   r0
+3: addir6,r6,4 /* copy a cache line */
+   lwzxr0,r6,r4
+   stwxr0,r6,r3
+   bdnz3b
+   dcbst   r6,r3   /* write it to memory */
+   sync
+   cmplw   0,r6,r5
+   blt 4b
+
+   /* Do real store operation to complete stwu */
+   lwz r5,GPR1(r1)
+   stw r8,0(r5)
+
+   /* Clear _TIF_EMULATE_STACK_STORE flag */
+   lis r11,_TIF_EMULATE_STACK_STORE@h
+   addir5,r9,TI_FLAGS
+0: lwarx   r8,0,r5
+   andcr8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+   dcbt0,r5
+#endif
+   stwcx.  r8,0,r5
+   bne-0b
+1:
+
+#ifdef CONFIG_PREEMPT
+   /* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r9)
cmpwi   0,r0,0  /* if non-zero, just restore regs and return */
bne restore
-   lwz r0,TI_FLAGS(r9)
-   andi.   r0,r0,_TIF_NEED_RESCHED
+   andi.   r8,r8,_TIF_NEED_RESCHED
beq+restore
+   lwz r3,_MSR(r1)
andi.   r0,r3,MSR_EE/* interrupts off? */
beq restore /* don't schedule if so */
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -864,8 +909,6 @@ resume_kernel:
 */
bl  trace_hardirqs_on
 #endif
-#else
-resume_kernel:
 #endif /* CONFIG_PREEMPT */

/* interrupts are hard-disabled at this point */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b40e0b4..cc43b64 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -593,6 +593,51 @@ _GLOBAL(ret_from_except_lite)
b   .ret_from_except

 resume_kernel:
+   /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+   CURRENT_THREAD_INFO(r9, r1)
+   ld  r8,TI_FLAGS(r9)
+   andis.  r8,r8,_TIF_EMULATE_STACK_STORE@h
+   beq+1f
+
+   addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
+
+   lwz r3,GPR1(r1)
+   subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception 
frame */
+   mr  r4,r1   /* src:  current exception frame */
+   li  r5,INT_FRAME_SIZE   /* size: INT_FRAME_SIZE */
+   li  r6,0/* start offset: 0 */
+   mr  r1,r3   /* Reroute the trampoline frame to r1 */
+
+   /* Copy from the original to the trampoline. */
+   addir5,r5,-8
+   addir6,r6,-8
+4: li  r0,8
+   mtctr   r0  
+3: addir6,r6,8 /* copy a cache line*/
+   ldx r0,r6,r4
+   stdxr0,r6,r3
+   bdnz3b
+   dcbst   r6,r3   /* write it to memory   */
+   sync
+   cmpld   0,r6,r5
+   blt 4b
+   s

  1   2   >