> + error_type = (i < ARRAY_SIZE(aer_cor_errors))
> + ? aer_cor_errors[i] : "Unknown";
> + pr_err("EEH:AER Error Type: %s\n", error_type);
> + }
> + }
> + }
> +}
> +
> /*
> * This routine captures assorted PCI configuration space data
> * for the indicated PCI device, and puts them into a buffer
> @@ -237,9 +317,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev,
> char *buf, size_t len)
> pr_warn("%s\n", buffer);
> }
>
> - /* If AER capable, dump it */
> + /* If AER capable, parse and dump it */
> cap = edev->aer_cap;
> if (cap) {
> + eeh_parse_aer_registers(edev, cap);
> +
> n += scnprintf(buf+n, len-n, "pci-e AER:\n");
> pr_warn("EEH: PCI-E AER capability register set follows:\n");
>
> --
> 2.48.1
>
>
--
Mahesh J Salgaonkar
> +
> /* Retrieve parameters */
> ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
>&pe_no, &type, &func, &addr, &mask);
> --
> 2.39.5
>
>
--
Mahesh J Salgaonkar
> return -EFAULT;
>
> + buf[ret] = '\0';
> +
> /*
>* When PE is NULL the event is a "special" event. Rather than
>* recovering a specific PE it forces the EEH core to scan for failed
> --
> 2.39.5
>
>
--
Mahesh J Salgaonkar
On 2025-05-14 09:54:46 Wed, Mahesh J Salgaonkar wrote:
> On 2025-04-15 12:47:58 Tue, Jiri Slaby (SUSE) wrote:
> > of_node_to_fwnode() is irqdomain's reimplementation of the "officially"
> > defined of_fwnode_handle(). The former is in the process of being
> >
wnode_handle(phb->dn),
> &pseries_msi_domain_info,
> phb->dev_domain);
> if (!phb->msi_domain) {
> --
> 2.49.0
>
>
--
Mahesh J Salgaonkar
rgs_append
> +
> +Note: Additional kernel arguments for fadump with HASH MMU is only
> + supported if the RMA size is greater than 768 MB. If the RMA
> + size is less than 768 MB, the kernel does not export the
> + /sys/kernel/fadump/bootargs_append sysfs node.
Reviewed-by: Mahesh Salgaonkar
Thanks,
-Mahesh.
> +
> Implementation details:
> ---
>
> --
> 2.48.1
>
>
--
Mahesh J Salgaonkar
On 2025-01-20 23:05:00 Mon, Sourabh Jain wrote:
> Commit 683eab94da75bc ("powerpc/fadump: setup additional parameters for
> dump capture kernel") introduced the additional parameter feature in
> fadump for HASH MMU with the understanding that GRUB does not use the
> memory area between 640MB and 76
On 2025-01-08 15:44:56 Wed, Sourabh Jain wrote:
> Commit 59d58189f3d9 ("crash: fix crash memory reserve exceed system
> memory bug") fails crashkernel parsing if the crash size is found to be
> higher than system RAM, which makes the memory_limit adjustment code
> ineffective due to an early exit f
On 2025-01-08 15:44:57 Wed, Sourabh Jain wrote:
> Commit 0ab97169aa05 ("crash_core: add generic function to do
> reservation") added a generic function to reserve crashkernel memory.
> So let's use the same function on powerpc and remove the
> architecture-specific code that essentially does the sa
@@ -196,7 +196,7 @@ static int add_notes_attrs(struct module *mod, const
> struct load_info *info)
> nattr->attr.mode = 0444;
> nattr->size = info->sechdrs[i].sh_size;
> nattr->private = (void *)info->sechdrs[i].sh_addr;
> - nattr->read = sysfs_bin_attr_simple_read;
> + nattr->read_new = sysfs_bin_attr_simple_read;
> ++nattr;
> }
> ++loaded;
>
> --
> 2.47.1
>
>
--
Mahesh J Salgaonkar
On 2024-11-23 21:20:39 Sat, Haren Myneni wrote:
[...]
> +static ssize_t papr_platform_dump_handle_read(struct file *file,
> + char __user *buf, size_t size, loff_t *off)
> +{
> + struct ibm_platform_dump_params *params = file->private_data;
> + u64 total_bytes;
> + s32 fwrc;
On 2024-08-23 10:11:58 Fri, Narayana Murty N wrote:
> VFIO_EEH_PE_INJECT_ERR ioctl is currently failing on pseries
> due to missing implementation of err_inject eeh_ops for pseries.
> This patch implements pseries_eeh_err_inject in eeh_ops/pseries
> eeh_ops. Implements support for injecting MMIO lo
On 2024-03-08 19:08:50 Fri, Michael Ellerman wrote:
> Aneesh Kumar K V writes:
> > On 3/7/24 5:13 PM, Michael Ellerman wrote:
> >> Mahesh Salgaonkar writes:
> >>> nmi_enter()/nmi_exit() touches per cpu variables which can lead to kernel
> >>> crash when invoked during real mode interrupt handling
ectory '/home/michael/linux/.build'
> GEN Makefile
> #
> # configuration written to .config
> #
> make[1]: Leaving directory '/home/michael/linux/.build'
> ld: arch/powerpc/kernel/traps.o:(.toc+0x0): undefined reference to
> `__percpu_embed_first_chunk'
> ld: arch/powerpc/kernel/mce.o:(.toc+0x0): undefined reference to
> `__percpu_embed_first_chunk'
> make[3]: *** [../scripts/Makefile.vmlinux:37: vmlinux] Error 1
>
> I guess because it has CONFIG_JUMP_LABEL=n?
Even with CONFIG_JUMP_LABEL=n it should still work. Let me take look and
fix this for microwatt build.
Thanks for your review.
-Mahesh.
>
> cheers
--
Mahesh J Salgaonkar
etup_64.c
> > +++ b/arch/powerpc/kernel/setup_64.c
> > @@ -834,6 +834,11 @@ static __init int pcpu_cpu_to_node(int cpu)
> >
> > unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> > EXPORT_SYMBOL(__per_cpu_offset);
> > +#ifdef CONFIG_JUMP_LABEL
>
> Why this ifdef ? Even when CONFIG_JUMP_LABEL is not selected all this
> should just work fine.
Yes you are right. I overlooked this. Will fix it in next revision.
Thanks for your review.
--
Mahesh J Salgaonkar
hreads);
> + /*
> + * Ideally, nr_cpus=1 can be achieved if each kernel
> + * component does not assume cpu0 is onlined.
> + */
> + if (boot_cpuid != 0 && nr_cpu_ids < 2)
> + set_nr_cpu_ids(2);
> }
> #ifdef CONFIG_SMP
> /* logical cpu id is always 0 on UP kernels */
> --
> 2.31.1
>
--
Mahesh J Salgaonkar
/sys/kernel/reboot/cpu
$ kexec -e
Kexec kernel fails to boot.
Will work on changes to make kexec -e to work for nr_cpus in v3.
Thanks for your review.
-Mahesh.
>
> Thanks,
>
> Pingfan
>
> > paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
> >
> > setup_paca(&kexec_paca);
> > diff --git a/arch/powerpc/kexec/file_load_64.c
> > b/arch/powerpc/kexec/file_load_64.c
> > index 110d28bede2a7..42d55a19454a7 100644
> > --- a/arch/powerpc/kexec/file_load_64.c
> > +++ b/arch/powerpc/kexec/file_load_64.c
> > @@ -1027,7 +1027,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage
> > *image)
> > *
> > * Returns 0 on success, negative errno on error.
> > */
> > -static int add_node_props(void *fdt, int node_offset, const struct
> > device_node *dn)
> > +int add_node_props(void *fdt, int node_offset, const struct device_node
> > *dn)
> > {
> > int ret = 0;
> > struct property *pp;
> >
> >
--
Mahesh J Salgaonkar
if (size > count) {
> err = -EINVAL;
> goto err_requeue;
> @@ -352,7 +359,7 @@ static int opal_prd_msg_notifier(struct notifier_block
> *nb,
> if (!item)
> return -ENOMEM;
>
> - memcpy(&item->msg, msg->params, msg_size);
> + memcpy(&item->msg.data, msg->params, msg_size);
>
> spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
> list_add_tail(&item->list, &opal_prd_msg_queue);
> --
> 2.41.0
>
--
Mahesh J Salgaonkar
> > + break;
> > + case RTAS_SLOT_UNISOLATED: /* Isolation error */
> > + rc = -EFAULT;
> > + break;
> > + case RTAS_SLOT_NOT_UNISOLATED: /* Outstanding TCE/PTE */
> > + rc = -EEXIST;
> > + break;
> > + case RTAS_SLOT_NOT_USABLE: /* No usable slot */
> > + rc = -ENODEV;
> > + break;
> > + default:
> > + pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
> > + rc = -ERANGE;
> > + break;
> > }
> > return rc;
> > }
> > +EXPORT_SYMBOL(rtas_generic_errno);
>
> Should be GPL.
Will fix it in next revision.
Thanks for your review.
--
Mahesh J Salgaonkar
On 2023-08-01 16:38:08 Tue, Bjorn Helgaas wrote:
> On Mon, Jul 24, 2023 at 02:25:19PM +0530, Mahesh Salgaonkar wrote:
> > When certain PHB HW failure causes pHyp to recover PHB, it marks the PE
> > state as temporarily unavailable until recovery is complete. This also
> > triggers an EEH handler in
On 2023-07-18 23:19:23 Tue, Michael Ellerman wrote:
> Mahesh J Salgaonkar writes:
> > On 2023-07-17 20:15:53 Mon, Sachin Sant wrote:
> >> Kdump seems to be broken with 6.5 for ppc64le.
> >>
> >> [ 14.200412] systemd[1]: Starting dracut pre-pivot and cleanup
On 2023-07-17 20:15:53 Mon, Sachin Sant wrote:
> Kdump seems to be broken with 6.5 for ppc64le.
>
> [ 14.200412] systemd[1]: Starting dracut pre-pivot and cleanup hook...
> [[0;32m OK [0m] Started dracut pre-pivot and cleanup hook.
> Starting Kdump Vmcore Save Service...
> [ 14.231669] systemd[1]:
On 2023-07-05 11:06:46 Wed, Jordan Niethe wrote:
>
>
> On 26/6/23 5:04 pm, Mahesh Salgaonkar wrote:
> > opal_prd_msg_notifier extracts the opal prd message size from the message
> > header and uses it for allocating opal_prd_msg_queue_item that includes
> > the correct message size to be copied.
On 2023-06-09 12:44:04 Fri, Hari Bathini wrote:
> Invoke ibm,os-term call with rtas_call_unlocked(), without using the
> RTAS spinlock, to avoid deadlock in the unlikely event of a machine
> crash while making an RTAS call.
>
> Signed-off-by: Hari Bathini
> ---
> arch/powerpc/kernel/rtas.c | 4 +
() returns void. You may want to extract the status
from args->rets[0].
Thanks,
-Mahesh.
> } while (rtas_busy_delay_time(status));
>
> if (status != 0)
> --
> 2.40.1
>
--
Mahesh J Salgaonkar
On 2023-02-16 13:38:57 Thu, Athira Rajeev wrote:
> Add a function dt_find_by_name_substr() that returns the child node if
> it matches till first occurence at "@" of a given name, otherwise NULL.
> This is helpful for cases with node name like: "name@addr". In
> scenarios where nodes are added with
On 2023-01-18 11:14:50 Wed, Athira Rajeev wrote:
> Add a function dt_find_by_name_substr() that returns the child node if
> it matches till first occurence at "@" of a given name, otherwise NULL.
> This is helpful for cases with node name like: "name@addr". In
> scenarios where nodes are added with
On 2022-11-14 13:26:07 Mon, Ganesh Goudar wrote:
> machine_check_log_err() is not getting called for all
> unrecoverable errors, And we are missing to log the error.
>
> Raise irq work in save_mce_event() for unrecoverable errors,
> So that we log the error from MCE event handling block in
> timer
, I am still around. I am currently looking into the EEH and will be
glad to take over the maintenanership of EEH for powerpc. Please feel
free to add me as maintainer for EEH.
Thanks,
--
Mahesh J Salgaonkar
On 2022-04-28 15:47:40 Thu, Bjorn Helgaas wrote:
> On Tue, Apr 26, 2022 at 11:07:39PM +0530, Mahesh Salgaonkar wrote:
> > When certain PHB HW failure causes phyp to recover PHB, it marks the PE
> > state as temporarily unavailable until recovery is complete. This also
> > triggers an EEH handler in
+ return rc;
> +}
> +
> int rpaphp_get_sensor_state(struct slot *slot, int *state)
> {
> int rc;
> int setlevel;
>
> - rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state);
> + rc = __rpaphp_get_sensor_state(slot, state);
>
> if (rc < 0) {
> if (rc == -EFAULT || rc == -EEXIST) {
> @@ -39,8 +134,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
> dbg("%s: power on slot[%s] failed rc=%d.\n",
> __func__, slot->name, rc);
> } else {
> - rc = rtas_get_sensor(DR_ENTITY_SENSE,
> - slot->index, state);
> + rc = __rpaphp_get_sensor_state(slot, state);
> }
> } else if (rc == -ENODEV)
> info("%s: slot is unusable\n", __func__);
>
>
--
Mahesh J Salgaonkar
of reg_value contains logical cpu id */
> cpu = (be64_to_cpu(reg_entry->reg_value) &
> RTAS_FADUMP_CPU_ID_MASK);
> - if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
> + if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) {
> RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
> continue;
> }
> --
> 2.35.1
>
--
Mahesh J Salgaonkar
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
> > + rc = -EBUSY;
> > + break;
> > + default:
> > + err("%s: unexpected RTAS error %d\n", __func__, rtas_rc);
> > + rc = -ERANGE;
> > + break;
> > + }
> > + return rc;
> > +}
>
> These conversions look OK to me.
--
Mahesh J Salgaonkar
) invoke
> rtas_call("get-sensor-state", ...) directly and code whatever special
> behavior is needed there, instead of introducing a new exported API. The
Posted v3 with above approach.
https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-December/237538.html
Thanks for your review.
-Mahesh.
--
Mahesh J Salgaonkar
ange is trying to address. I checked a couple of
> its call sites and it seems like this is going to propagate back into
> the PCI hotplug core which of course doesn't understand RTAS call
> statuses. So this doesn't seem right.
Thanks for pointing it out. I should convert that into an error before
returning. I overlooked this when I moved away from get_sensor_state().
>
> Maybe it would be better to have rpaphp_get_sensor_state() invoke
> rtas_call("get-sensor-state", ...) directly and code whatever special
> behavior is needed there, instead of introducing a new exported API. The
> driver seems to want to deal with the RTAS return values anyway - it's
> implicitly mapping ENODEV, EFAULT, EEXIST from rtas_get_sensor() back to
> -9002, -9000, -9001 respectively.
Sure I will try this.
Thanks,
-Mahesh.
--
Mahesh J Salgaonkar
x 0016:01:00.1: enabling device (0140 -> 0142)
[16623.580241] bnx2x: [bnx2x_io_slot_reset:14375(enP22p1s0f1)]IO slot reset -->
driver unload
[16623.580245] PCI 0016:01:00.1#1: EEH: bnx2x driver reports: 'disconnect'
[16623.580246] EEH: Finished:'slot_reset' with aggregate recovery
state:'disconnect'
[16623.580250] EEH: Unable to recover from failure from PHB#16-PE#1.
Thanks,
-Mahesh.
--
Mahesh J Salgaonkar
On 2021-11-24 22:57:13 Wed, Oliver O'Halloran wrote:
> On Wed, Nov 24, 2021 at 7:45 PM Mahesh J Salgaonkar
> wrote:
> >
> > No it doesn't. We will still do a presence check before the recovery
> > process starts. This patch moves the check after notifying
On 2021-11-24 10:14:30 Wed, Michael Ellerman wrote:
> Mahesh Salgaonkar writes:
> > When certain PHB HW failure causes phyp to recover PHB, it marks the PE
> > state as temporarily unavailable until recovery is complete. This also
> > triggers an EEH handler in Linux which needs to notify drivers,
On 2021-05-07 10:41:46 Fri, Oliver O'Halloran wrote:
> On Fri, May 7, 2021 at 3:43 AM Mahesh Salgaonkar wrote:
> >
> > When certain PHB HW failure causes phyp to recover PHB, it marks the PE
> > state as temporarily unavailable. In this case, per PAPR, rtas call
> > ibm,read-slot-reset-state2 retu
On 2021-10-04 21:02:21 Mon, Aneesh Kumar K.V wrote:
> On 10/4/21 20:41, Sourabh Jain wrote:
> > From: Mahesh Salgaonkar
> >
> > On system with radix support available, early_radix_enabled() starts
> > returning true for a small window (until mmu_early_init_devtree() is
> > called) even when radix
7 ("powerpc/pseries/pci: Add MSI domains")
> Signed-off-by: Cédric Le Goater
Tested-by: Mahesh Salgaonkar
This fixes the issue reported at
https://lore.kernel.org/linuxppc-dev/65f0085f-c6a9-e3ea-4d60-fcf09b7c7...@linux.vnet.ibm.com/T/#u
by Abdul.
Thanks,
--
Mahesh J Salgaonkar
translation. Maybe something like:
>
> echo > /sys/kernel/debug/powerpc/eeh_addr_check
>
> Then in the kernel:
>
> struct resource *r = lookup_resource(mmio_addr);
> void *virt = ioremap_resource(r);
> ret = eeh_check_failure(virt);
> iounmap(virt)
>
> return ret;
>
> A little tedious, but then you can write a selftest :)
Sure, will give a try.
Thanks,
-Mahesh.
--
Mahesh J Salgaonkar
memblock_alloc_node(size,
> + __alignof__(struct slb_entry),
> + cpu_to_node(i));
> + }
> }
> }
>
> --
> 2.23.0
>
--
Mahesh J Salgaonkar
On 2020-10-14 12:18:13 Wed, Aneesh Kumar K.V wrote:
> Even though we use self removing sysfs helper, we still need
> to make sure we do the final kobject delete conditionally.
> sysfs_remove_file_self() will handle parallel calls to remove
> the sysfs attribute file and returns true only in the cal
On 2020-03-11 01:57:10 Wed, Hari Bathini wrote:
> Commit 0962e8004e97 ("powerpc/prom: Scan reserved-ranges node for
> memory reservations") enabled support to parse reserved-ranges DT
> node and reserve kernel memory falling in these ranges for F/W
> purposes. Memory reserved for FADump should not
gistration).
> - */
> - if (begin < ra_end && end > ra_start) {
> - if (begin < ra_start)
> - fadump_release_reserved_area(begin, ra_start);
> - if (end > ra_end)
> - fadump_release_reserved_area(ra_end, end);
> - } else
> - fadump_release_reserved_area(begin, end);
> + if (reserved_mrange_info.mem_range_cnt ==
> + reserved_mrange_info.max_mem_ranges)
> + reserved_mrange_info.mem_range_cnt--;
>
> + ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
> + if (ret != 0)
> return;
> - }
>
> /* Get the reserved ranges list in order first. */
> sort_and_merge_mem_ranges(&reserved_mrange_info);
>
--
Mahesh J Salgaonkar
_register(&papr_scm_driver);
> + if (!ret)
> + mce_register_notifier(&mce_ue_nb);
> +
> + return ret;
> +}
> +module_init(papr_scm_init);
> +
> +static void __exit papr_scm_exit(void)
> +{
> + mce_unregister_notifier(&mce_ue_nb);
> + platform_driver_unregister(&papr_scm_driver);
> +}
> +module_exit(papr_scm_exit);
Rest Looks good to me.
Reviewed-by: Mahesh Salgaonkar
Thanks,
-Mahesh.
> +
> MODULE_DEVICE_TABLE(of, papr_scm_match);
> MODULE_LICENSE("GPL");
> MODULE_AUTHOR("IBM Corporation");
> --
> 2.25.1
>
--
Mahesh J Salgaonkar
t) - 1;
> >>> evt = this_cpu_ptr(&mce_ue_event_queue[index]);
> >>> + blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
> >> Can we really use a blocking notifier here? I'm not sure that we can.
> >
> > I think we can, do you see any problem?
>
> No it looks okay after better look, sorry for the noise.
>
> Thanks,
> Nick
--
Mahesh J Salgaonkar
On 2020-03-25 20:34:06 Wed, Nicholas Piggin wrote:
> This allows rtas_args to be put on the machine check stack, which
> avoids a lot of complications with re-entrancy deadlocks.
>
> Signed-off-by: Nicholas Piggin
> ---
> arch/powerpc/kernel/setup_64.c | 15 ++-
> 1 file changed, 14
> - A) >= 0x7000) && ((A) < 0x7ff0)) || \
> - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16
> + A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
> + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16
>
> static inline struct rtas_error_log *fwnmi_get_errlog(void)
> {
> --
> 2.23.0
>
--
Mahesh J Salgaonkar
= __pa(machine_check_fwnmi) - PHYSICAL_START;
>
> - if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
> + if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
> + system_reset_addr,
> machine_check_addr))
> fwnmi_active = 1;
>
> --
> 2.23.0
>
--
Mahesh J Salgaonkar
On 2020-03-25 20:33:59 Wed, Nicholas Piggin wrote:
> Signed-off-by: Nicholas Piggin
> ---
> arch/powerpc/kernel/exceptions-64s.S | 4
> 1 file changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/kernel/exceptions-64s.S
> b/arch/powerpc/kernel/exceptions-64s.S
> index 6a936c9199d6..67cbc
ore_event = true;
> + regs->nip = extable_fixup(entry);
> + disposition = RTAS_DISP_FULLY_RECOVERED;
> + }
> switch (err_sub_type) {
> case MC_ERROR_UE_IFETCH:
> mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
> --
> 2.17.2
>
--
Mahesh J Salgaonkar
On 2019-12-11 20:01:18 Wed, Nicholas Piggin wrote:
> Provide facilities to decode machine checks into human readable
> strings, with only sufficient information required to deal with
> them sanely.
>
> The old machine check stuff was over engineered. The philosophy
> here is that OPAL should corre
On 2019-07-16 17:04:38 Tue, Hari Bathini wrote:
> If not all kernel boot memory regions are registered for MPIPL before
> system crashes, try processing the partial crashdump but warn the user
> before proceeding.
>
> Signed-off-by: Hari Bathini
> ---
> arch/powerpc/platforms/powernv/opal-fadump
On 2019-07-16 17:04:16 Tue, Hari Bathini wrote:
> Add a new kernel config option, CONFIG_PRESERVE_FA_DUMP that ensures
> that crash data, from previously crash'ed kernel, is preserved. This
> helps in cases where FADump is not enabled but the subsequent memory
> preserving kernel boot is likely to
On 2019-07-16 17:04:08 Tue, Hari Bathini wrote:
> From: Hari Bathini
>
> Firmware provides architected register state data at the time of crash.
> Process this data and build CPU notes to append to ELF core.
>
> Signed-off-by: Hari Bathini
> Signed-off-by: Vasant Hegde
> ---
> arch/powerpc/ke
is active.\n");
> + fadump_conf->dump_active = 1;
> + opal_fadump_get_config(fadump_conf, r_opal_fdm_active);
> + }
> +
> return 1;
> }
>
--
Mahesh J Salgaonkar
On 2019-07-16 17:03:30 Tue, Hari Bathini wrote:
> Firmware uses 32-bit field for region size while copying/backing-up
> memory during MPIPL. So, the maximum copy size for a region would
> be a page less than 4GB (aligned to pagesize) but FADump capture
> kernel usually needs more memory than that t
On 2019-07-16 17:03:23 Tue, Hari Bathini wrote:
> Make OPAL calls to register and un-register with firmware for MPIPL.
>
> Signed-off-by: Hari Bathini
> ---
> arch/powerpc/platforms/powernv/opal-fadump.c | 71
> +-
> 1 file changed, 69 insertions(+), 2 deletions(-)
>
On 2019-07-16 17:03:15 Tue, Hari Bathini wrote:
> OPAL allows registering address with it in the first kernel and
> retrieving it after MPIPL. Setup kernel metadata and register its
> address with OPAL to use it for processing the crash dump.
>
> Signed-off-by: Hari Bathini
> ---
> arch/powerpc/
dump data is valid. */
> + if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
> + RTAS_FADUMP_ERROR_FLAG) ||
> + (fdm_active->cpu_state_data.error_flags != 0) ||
> + (fdm_active->rmr_region.error_flags != 0)) {
> + pr_err("Dump taken by platform is not valid\n");
> + return -EINVAL;
> + }
> + if ((fdm_active->rmr_region.bytes_dumped !=
> + fdm_active->rmr_region.source_len) ||
> + !fdm_active->cpu_state_data.bytes_dumped) {
> + pr_err("Dump taken by platform is incomplete\n");
> + return -EINVAL;
> + }
> +
> + /* Validate the fadump crash info header */
> + fdh = __va(fadump_conf->fadumphdr_addr);
> + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
> + pr_err("Crash info header is not valid.\n");
> + return -EINVAL;
> + }
> +
> + if (!fdm_active->cpu_state_data.bytes_dumped)
> + return -EINVAL;
> +
> + rc = rtas_fadump_build_cpu_notes(fadump_conf);
> + if (rc)
> + return rc;
> +
> + /*
> + * We are done validating dump info and elfcore header is now ready
> + * to be exported. set elfcorehdr_addr so that vmcore module will
> + * export the elfcore header through '/proc/vmcore'.
> + */
> + elfcorehdr_addr = fdh->elfcorehdr_addr;
> +
> + return 0;
> }
>
> static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
> struct seq_file *m)
> {
> - const struct rtas_fadump_mem_struct *fdm_ptr = &fdm;
> + const struct rtas_fadump_mem_struct *fdm_ptr;
> const struct rtas_fadump_section *cpu_data_section;
>
> + if (fdm_active)
> + fdm_ptr = fdm_active;
> + else
> + fdm_ptr = &fdm;
> +
> cpu_data_section = &(fdm_ptr->cpu_state_data);
> seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
> be64_to_cpu(cpu_data_section->destination_address),
> @@ -219,6 +473,12 @@ static void rtas_fadump_region_show(struct fw_dump
> *fadump_conf,
> seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
> be64_to_cpu(fdm_ptr->rmr_region.source_len),
> be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
> +
> + /* Dump is active. Show reserved area start address. */
> + if (fdm_active) {
> + seq_printf(m, "\nMemory above %#016lx is reserved for saving
> crash dump\n",
> +fadump_conf->reserve_dump_area_start);
> + }
> }
>
> static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
> @@ -228,6 +488,7 @@ static void rtas_fadump_trigger(struct
> fadump_crash_info_header *fdh,
> rtas_os_term((char *)msg);
> }
>
> +
> static struct fadump_ops rtas_fadump_ops = {
> .init_fadump_mem_struct = rtas_fadump_init_mem_struct,
> .register_fadump= rtas_fadump_register_fadump,
> @@ -258,6 +519,17 @@ int __init rtas_fadump_dt_scan(struct fw_dump
> *fadump_conf, ulong node)
> fadump_conf->fadump_platform= FADUMP_PLATFORM_PSERIES;
> fadump_conf->fadump_supported = 1;
>
> + /*
> + * The 'ibm,kernel-dump' rtas node is present only if there is
> + * dump data waiting for us.
> + */
> + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
> + if (fdm_active) {
> + pr_info("Firmware-assisted dump is active.\n");
> + fadump_conf->dump_active = 1;
> + rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
> + }
> +
> /* Get the sizes required to store dump data for the firmware provided
>* dump sections.
>* For each dump section type supported, a 32bit cell which defines
>
--
Mahesh J Salgaonkar
On 2019-07-16 17:02:38 Tue, Hari Bathini wrote:
> Make RTAS calls to register and un-register for FADump. Also, update
> how fadump_region contents are diplayed to provide more information.
>
> Signed-off-by: Hari Bathini
> ---
> arch/powerpc/kernel/fadump-common.h |2
> arch/power
ll which defines
> + * the ID of a supported section followed by two 32 bit cells which
> + * gives the size of the section in bytes.
> + */
> + sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
> + &size);
> +
> + if (!sections)
> + return 1;
> +
> + num_sections = size / (3 * sizeof(u32));
> +
> + for (i = 0; i < num_sections; i++, sections += 3) {
> + u32 type = (u32)of_read_number(sections, 1);
> +
> + switch (type) {
> + case RTAS_FADUMP_CPU_STATE_DATA:
> + fadump_conf->cpu_state_data_size =
> + of_read_ulong(§ions[1], 2);
> + break;
> + case RTAS_FADUMP_HPTE_REGION:
> + fadump_conf->hpte_region_size =
> + of_read_ulong(§ions[1], 2);
> + break;
> + }
> + }
> +
> + return 1;
> +}
>
--
Mahesh J Salgaonkar
On 2019-06-21 06:27:15 Fri, Santosh Sivaraj wrote:
> From: Reza Arbab
>
> Testing my memcpy_mcsafe() work in progress with an injected UE, I get
> an error like this immediately after the function returns:
>
> BUG: Unable to handle kernel data access at 0x7fff84dec8f8
> Faulting instruction addr
_CHECK_HANDLER_WINDUP
> @@ -1238,13 +1219,6 @@ EXC_COMMON_BEGIN(unrecover_mce)
> bl unrecoverable_exception
> b 1b
>
> -EXC_COMMON_BEGIN(mce_return)
> - /* Invoke machine_check_exception to print MCE event and return. */
> - addir3,r1,STACK_FRAME_OVERHEAD
> - bl machine_check_exception
> - MACHINE_CHECK_HANDLER_WINDUP
> - RFI_TO_KERNEL
> - b .
>
> EXC_REAL_BEGIN(data_access, 0x300, 0x80)
> EXCEPTION_PROLOG_0 PACA_EXGEN
> --
> 2.20.1
>
--
Mahesh J Salgaonkar
On 2019-04-16 16:06:13 Tue, Hari Bathini wrote:
> OPAL loads kernel & initrd at 512MB offset (256MB size), also exported
> as ibm,opal/dump/fw-load-area. So, if boot memory size of FADump is
> less than 768MB, kernel memory to be exported as '/proc/vmcore' would
> be overwritten by f/w while loadin
On 2019-04-16 16:05:06 Tue, Hari Bathini wrote:
> From: Hari Bathini
>
> Firmware provides architected register state data at the time of crash.
> Process this data and build CPU notes to append to ELF core.
>
> Signed-off-by: Hari Bathini
> Signed-off-by: Vasant Hegde
> ---
>
> Changes in v2
From: Mahesh Salgaonkar
Print more information about mce error whether it is an hardware or
software error.
Some of the mce errors can be easily categorized as hardware or software
errors e.g. UEs are due to hardware error, where as error triggered due to
invalid usage of tlbie is a pure softwar
From: Mahesh Salgaonkar
Also add cpu number while displaying mce log. This will help cleaner logs
when mce hits on multiple cpus simultaneously.
before the changes the mce o/p was:
[ 127.223515] Severe Machine check interrupt [Recovered]
[ 127.223530] NIP [dba80280]:
insert_slb_ent
From: Mahesh Salgaonkar
Currently all machine check errors are printed as severe errors which isn't
correct. Print soft errors as warning instead of severe errors.
Signed-off-by: Mahesh Salgaonkar
---
change in v2:
- Use kernel types i.e. u8, u64 etc.
- Define sync_error as bool.
---
arch/powe
From: Mahesh Salgaonkar
This is a follow up to the patch that fixed misleading print for TLB
mutlihit due to wrongly populated mc_err_types[] array. Convert all the
static array initialization to '[x] = val' style for better
readability of array indexing and avoid any further confusion.
Suggeste
From: Mahesh Salgaonkar
On pseries, TLB multihit are reported as D-Cache Multihit. This is because
the wrongly populated mc_err_types[] array. Per PAPR, TLB error type is 0x04
and mc_err_types[4] points to "D-Cache" instead of "TLB" string. Fixup the
mc_err_types[] array.
Machine check error typ
From: Mahesh Salgaonkar
Print more information about mce error whether it is an hardware or
software error.
Some of the mce errors can be easily categorized as hardware or software
errors e.g. UEs are due to hardware error, where as error triggered due to
invalid usage of tlbie is a pure softwar
From: Mahesh Salgaonkar
Currently all machine check errors are printed as severe errors which isn't
correct. Print soft errors as warning instead of severe errors.
Signed-off-by: Mahesh Salgaonkar
---
arch/powerpc/include/asm/mce.h| 26 +++---
arch/powerpc/kernel/mce.c |
From: Mahesh Salgaonkar
Also add cpu number while displaying mce log. This will help cleaner logs
when mce hits on multiple cpus simultaneously.
Signed-off-by: Mahesh Salgaonkar
---
arch/powerpc/include/asm/mce.h |2 -
arch/powerpc/kernel/mce.c | 86 -
From: Mahesh Salgaonkar
The kcov instrumentation inside SLB routines causes duplicate SLB entries
to be added resulting into SLB multihit machine checks.
Disable kcov instrumentation on slb.o
Signed-off-by: Mahesh Salgaonkar
Acked-by: Andrew Donnellan
---
Change in v2:
- Add comment as request
From: Mahesh Salgaonkar
On TOD/TB errors timebase register stops/freezes until HMI error recovery
gets TOD/TB back into running state. On successful recovery, TB starts
running again and udelay() that relies on TB value continues to function
properly. But in case when HMI fails to recover from TO
From: Mahesh Salgaonkar
The kcov instrumentation inside SLB routines causes duplicate SLB entries
to be added resulting into SLB multihit machine checks.
Disable kcov instrumentation on slb.o
Signed-off-by: Mahesh Salgaonkar
---
arch/powerpc/mm/Makefile |1 +
1 file changed, 1 insertion(+)
On 2019-02-20 12:05:50 Wed, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
>
> Now, when a machine check occurs inside a guest, we always get the
> machine check e
On 2019-02-20 12:06:23 Wed, Paul Mackerras wrote:
> This adds an "in_guest" parameter to machine_check_print_event_info()
> so that we can avoid trying to translate guest NIP values into
> symbolic form using the host kernel's symbol table.
>
> Signed-off-by: Paul Mackerras
> ---
> arch/powerpc/
From: Mahesh Salgaonkar
opal_power_control_init() depends on opal message notifier to be
initialized, which is done in opal_init()->opal_message_init(). But both
these initialization are called through machine initcalls and it all
depends on in which order they being called. So far these are call
From: Mahesh Salgaonkar
Now that other platforms also implements real mode mce handler,
lets consolidate the code by sharing existing powernv machine check
early code. Rename machine_check_powernv_early to
machine_check_common_early and reuse the code.
Signed-off-by: Mahesh Salgaonkar
---
arch
From: Mahesh Salgaonkar
If we get a machine check exceptions due to SLB errors then dump the
current SLB contents which will be very much helpful in debugging the
root cause of SLB errors. Introduce an exclusive buffer per cpu to hold
faulty SLB entries. In real mode mce handler saves the old SLB
From: Mahesh Salgaonkar
Extract the MCE error details from RTAS extended log and display it to
console.
With this patch you should now see mce logs like below:
[ 142.371818] Severe Machine check interrupt [Recovered]
[ 142.371822] NIP [dca301b8]: init_module+0x1b8/0x338 [bork_kernel
From: Mahesh Salgaonkar
On pseries, as of today system crashes if we get a machine check
exceptions due to SLB errors. These are soft errors and can be fixed by
flushing the SLBs so the kernel can continue to function instead of
system crash. We do this in real mode before turning on MMU. Otherwi
From: Mahesh Salgaonkar
On pseries, the machine check error details are part of RTAS extended
event log passed under Machine check exception section. This patch adds
the definition of rtas MCE event section and related helper
functions.
Signed-off-by: Mahesh Salgaonkar
---
---
arch/powerpc/inc
This patch series includes some improvement to Machine check handler
for pSeries. This patch series drops the sysctl knob patch that was
proposed in v7. The SLB recovery code now uses flush_and_reload_slb()
from mce_power.c.
Patch 1 defines MCE error event section.
Patch 2 implements a real mode m
From: Mahesh Salgaonkar
On pseries, the machine check error details are part of RTAS extended
event log passed under Machine check exception section. This patch adds
the definition of rtas MCE event section and related helper
functions.
Signed-off-by: Mahesh Salgaonkar
---
---
arch/powerpc/inc
From: Mahesh Salgaonkar
Now that other platforms also implements real mode mce handler,
lets consolidate the code by sharing existing powernv machine check
early code. Rename machine_check_powernv_early to
machine_check_common_early and reuse the code.
Signed-off-by: Mahesh Salgaonkar
---
arch
From: Mahesh Salgaonkar
If we get a machine check exceptions due to SLB errors then dump the
current SLB contents which will be very much helpful in debugging the
root cause of SLB errors. Introduce an exclusive buffer per cpu to hold
faulty SLB entries. In real mode mce handler saves the old SLB
From: Mahesh Salgaonkar
Extract the MCE error details from RTAS extended log and display it to
console.
With this patch you should now see mce logs like below:
[ 142.371818] Severe Machine check interrupt [Recovered]
[ 142.371822] NIP [dca301b8]: init_module+0x1b8/0x338 [bork_kernel
From: Mahesh Salgaonkar
On pseries, as of today system crashes if we get a machine check
exceptions due to SLB errors. These are soft errors and can be fixed by
flushing the SLBs so the kernel can continue to function instead of
system crash. We do this in real mode before turning on MMU. Otherwi
This patch series includes some improvement to Machine check handler
for pSeries. First 3 patches from v7 revision are already in powerpc next.
Posting rest of the patches with review comments. This patch series drops
the sysctl knob patch that was proposed in v7. The SLB recovery code now
uses flu
From: Mahesh Salgaonkar
With the powerpc next commit e7e81847478 (powerpc/mce: Fix SLB rebolting
during MCE recovery path.), the SLB error recovery is broken. The new
change now does not add index value to RB[52-63] that selects the SLB
entry while rebolting, instead it assumes that the shadow sa
From: Mahesh Salgaonkar
With the powrpc next commit e7e81847478 (poewrpc/mce: Fix SLB rebolting
during MCE recovery path.), the SLB error recovery is broken. The new
change now does not add index value to RB[52-63] that selects the SLB
entry while rebolting, instead it assumes that the shadow sav
From: Mahesh Salgaonkar
For fadump to work successfully there should not be any holes in reserved
memory ranges where kernel has asked firmware to move the content of old
kernel memory in event of crash. Now that fadump uses CMA for reserved
area, this memory area is now not protected from hot-re
From: Mahesh Salgaonkar
fadump fails to register when there are holes in reserved memory area.
This can happen if user has hot-removed a memory that falls in the fadump
reserved memory area. Throw a meaningful error message to the user in
such case.
Signed-off-by: Mahesh Salgaonkar
---
arch/po
From: Mahesh Salgaonkar
One of the primary issues with Firmware Assisted Dump (fadump) on Power
is that it needs a large amount of memory to be reserved. On large
systems with TeraBytes of memory, this reservation can be quite
significant.
In some cases, fadump fails if the memory reserved is in
One of the primary issues with Firmware Assisted Dump (fadump) on Power
is that it needs a large amount of memory to be reserved. This reserved
memory is used for saving the contents of old crashed kernel's memory before
fadump capture kernel uses old kernel's memory area to boot. However, This
res
1 - 100 of 457 matches
Mail list logo