On 03/17/2013 05:49 AM, Huacai Chen wrote:
This and the next patch resolve memory corruption problems while CPU
hotplug. Without these patches, memory corruption can triggered easily
as below:

On a quad-core MIPS platform, use "spawn" of UnixBench-5.1.3 (http://
code.google.com/p/byte-unixbench/) and a CPU hotplug script like this
(hotplug.sh):
while true; do
echo 0 >/sys/devices/system/cpu/cpu1/online
echo 0 >/sys/devices/system/cpu/cpu2/online
echo 0 >/sys/devices/system/cpu/cpu3/online
sleep 1
echo 1 >/sys/devices/system/cpu/cpu1/online
echo 1 >/sys/devices/system/cpu/cpu2/online
echo 1 >/sys/devices/system/cpu/cpu3/online
sleep 1
done

Run "hotplug.sh" and then run "spawn 10000", spawn will get segfault
after a few minutes.

This patch:
Currently, clear_page()/copy_page() are generated by Micro-assembler
dynamically. But they are unavailable until uasm_resolve_relocs() has
finished because jump labels are illegal before that. Since these
functions are shared by every CPU, we only call build_clear_page()/
build_copy_page() only once at boot time. Without this patch, programs
will get random memory corruption (segmentation fault, bus error, etc.)
while CPU Hotplug (e.g. one CPU is using clear_page() while another is
generating it in cpu_cache_init()).

For similar reasons we modify build_tlb_refill_handler()'s invocation.

V2:
1, Rework the code to make CPU#0 can be online/offline.
2, Introduce cpu_has_local_ebase feature since some types of MIPS CPU
    need a per-CPU tlb_refill_handler().

Signed-off-by: Huacai Chen <che...@lemote.com>
Signed-off-by: Hongbing Hu <h...@lemote.com>

We were seeing the same crashes, this patch set seems to fix the problem.

Acked-by: David Daney <david.da...@cavium.com>

---
  arch/mips/include/asm/cpu-features.h               |    3 +++
  .../asm/mach-loongson/cpu-feature-overrides.h      |    1 +
  arch/mips/mm/page.c                                |   10 ++++++++++
  arch/mips/mm/tlbex.c                               |   10 ++++++++--
  4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/cpu-features.h 
b/arch/mips/include/asm/cpu-features.h
index 1a57e8b..e5ec8fc 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -113,6 +113,9 @@
  #ifndef cpu_has_pindexed_dcache
  #define cpu_has_pindexed_dcache (cpu_data[0].dcache.flags & MIPS_CACHE_PINDEX)
  #endif
+#ifndef cpu_has_local_ebase
+#define cpu_has_local_ebase    1
+#endif

  /*
   * I-Cache snoops remote store.        This only matters on SMP.  Some 
multiprocessors
diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h 
b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
index 75fd8c0..c0f3ef4 100644
--- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
@@ -57,5 +57,6 @@
  #define cpu_has_vint          0
  #define cpu_has_vtag_icache   0
  #define cpu_has_watch         1
+#define cpu_has_local_ebase    0

  #endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index a29fba5..4eb8dcf 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -247,6 +247,11 @@ void __cpuinit build_clear_page(void)
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
        int i;
+       static atomic_t run_once = ATOMIC_INIT(0);
+
+       if (atomic_xchg(&run_once, 1)) {
+               return;
+       }

        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
@@ -389,6 +394,11 @@ void __cpuinit build_copy_page(void)
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
        int i;
+       static atomic_t run_once = ATOMIC_INIT(0);
+
+       if (atomic_xchg(&run_once, 1)) {
+               return;
+       }

        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 820e661..6bc28b4 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -2162,8 +2162,11 @@ void __cpuinit build_tlb_refill_handler(void)
        case CPU_TX3922:
        case CPU_TX3927:
  #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
-               build_r3000_tlb_refill_handler();
+               if (cpu_has_local_ebase)
+                       build_r3000_tlb_refill_handler();
                if (!run_once) {
+                       if (!cpu_has_local_ebase)
+                               build_r3000_tlb_refill_handler();
                        build_r3000_tlb_load_handler();
                        build_r3000_tlb_store_handler();
                        build_r3000_tlb_modify_handler();
@@ -2192,9 +2195,12 @@ void __cpuinit build_tlb_refill_handler(void)
                        build_r4000_tlb_load_handler();
                        build_r4000_tlb_store_handler();
                        build_r4000_tlb_modify_handler();
+                       if (!cpu_has_local_ebase)
+                               build_r4000_tlb_refill_handler();
                        run_once++;
                }
-               build_r4000_tlb_refill_handler();
+               if (cpu_has_local_ebase)
+                       build_r4000_tlb_refill_handler();
        }
  }



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to