Synchronize access to eeh_pe.

Signed-off-by: Sam Bobroff <sbobr...@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index eb37cb384ff4..171be70b34d8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -447,7 +447,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
        int ret;
-       unsigned long flags;
+       unsigned long flags, pe_flags;
        struct device_node *dn;
        struct pci_dev *dev;
        struct eeh_pe *pe, *parent_pe;
@@ -464,7 +464,9 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
                return 0;
        }
        dev = eeh_dev_to_pci_dev(edev);
+       /* TODO: Unsafe until eeh_dev can be synchronized with eeh_pe. */
        pe = eeh_dev_to_pe(edev);
+       eeh_get_pe(pe);
 
        /* Access to IO BARs might get this far and still not want checking. */
        if (!pe) {
@@ -475,6 +477,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
        if (!pe->addr && !pe->config_addr) {
                eeh_stats.no_cfg_addr++;
+               eeh_put_pe(pe); /* Release ref */
                return 0;
        }
 
@@ -482,17 +485,21 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
         * On PowerNV platform, we might already have fenced PHB
         * there and we need take care of that firstly.
         */
-       ret = eeh_phb_check_failure(pe);
-       if (ret > 0)
+       ret = eeh_phb_check_failure(pe); /* Acquire ref */
+       if (ret > 0) {
+               eeh_put_pe(pe); /* Release ref */
                return ret;
+       }
 
        /*
         * If the PE isn't owned by us, we shouldn't check the
         * state. Instead, let the owner handle it if the PE has
         * been frozen.
         */
-       if (eeh_pe_passed(pe))
+       if (eeh_pe_passed(pe)) {
+               eeh_put_pe(pe); /* Release ref */
                return 0;
+       }
 
        /* If we already have a pending isolation event for this
         * slot, we know it's bad already, we don't need to check.
@@ -548,7 +555,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
         * put into frozen state as well. We should take care
         * that at first.
         */
+       eeh_lock_pes(&pe_flags);
        parent_pe = pe->parent;
+       eeh_get_pe(parent_pe); /* Acquire ref */
+       eeh_unlock_pes(pe_flags);
        while (parent_pe) {
                /* Hit the ceiling ? */
                if (parent_pe->type & EEH_PE_PHB)
@@ -557,15 +567,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
                /* Frozen parent PE ? */
                ret = eeh_ops->get_state(parent_pe, NULL);
                if (ret > 0 && !eeh_state_active(ret)) {
+                       eeh_put_pe(pe); /* Release ref */
                        pe = parent_pe;
+                       eeh_get_pe(pe); /* Acquire ref */
                        pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at 
parent PHB#%x-PE#%x.\n",
                               pe->phb->global_number, pe->addr,
                               pe->phb->global_number, parent_pe->addr);
                }
 
                /* Next parent level */
-               parent_pe = parent_pe->parent;
+               eeh_pe_move_to_parent(&parent_pe);
        }
+       eeh_put_pe(parent_pe); /* Release ref */
 
        eeh_stats.slot_resets++;
 
@@ -582,11 +595,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
         */
        pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
                __func__, pe->phb->global_number, pe->addr);
-       eeh_send_failure_event(pe);
+       eeh_send_failure_event(pe); /* Give ref */
 
        return 1;
 
 dn_unlock:
+       eeh_put_pe(pe); /* Release ref */
        eeh_serialize_unlock(flags);
        return rc;
 }
-- 
2.22.0.216.g00a2a96fc9

Reply via email to