This patch increments the device_node reference counter when an EEH error 
occurs and decrements the counter when the event has been handled.  This is to 
prevent the device_node from being released until eeh_event_handler() has had a 
chance to deal with the event.  We've seen cases where the device_node is 
released too soon when an EEH event occurs during a dlpar remove, causing the 
event handler to attempt to access bad memory locations.

Please review and let me know of any concerns.

Signed-off-by: Mike Mason <mm...@us.ibm.com>
--- a/arch/powerpc/platforms/pseries/eeh_event.c        2008-10-09 
15:13:53.000000000 -0700
+++ b/arch/powerpc/platforms/pseries/eeh_event.c        2009-07-14 
14:14:00.000000000 -0700
@@ -75,6 +75,14 @@ static int eeh_event_handler(void * dumm
        if (event == NULL)
                return 0;

+       /* EEH holds a reference to the device_node, so if it
+        * equals 1 it's no longer valid and the event should
+        * be ignored */
+       if (atomic_read(&event->dn->kref.refcount) == 1) {
+               of_node_put(event->dn);
+               return 0;
+       }
+
        /* Serialize processing of EEH events */
        mutex_lock(&eeh_event_mutex);
        eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
@@ -86,6 +94,7 @@ static int eeh_event_handler(void * dumm

        eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
        pci_dev_put(event->dev);
+       of_node_put(event->dn);
        kfree(event);
        mutex_unlock(&eeh_event_mutex);

@@ -140,7 +149,7 @@ int eeh_send_failure_event (struct devic
        if (dev)
                pci_dev_get(dev);

-       event->dn = dn;
+       event->dn = of_node_get(dn);
        event->dev = dev;

        /* We may or may not be called in an interrupt context */


_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to