commit: a8a5bb77ad98f84e3bc415ffcb4953058ca6c37d Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Fri Jul 18 11:33:46 2014 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Fri Jul 18 11:33:46 2014 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/linux-patches.git;a=commit;h=a8a5bb77
Linux patch 3.4.99 --- 0000_README | 4 + 1098_linux-3.4.99.patch | 797 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 801 insertions(+) diff --git a/0000_README b/0000_README index 3c72076..d5d9d81 100644 --- a/0000_README +++ b/0000_README @@ -431,6 +431,10 @@ Patch: 1097_linux-3.4.98.patch From: http://www.kernel.org Desc: Linux 3.4.98 +Patch: 1098_linux-3.4.99.patch +From: http://www.kernel.org +Desc: Linux 3.4.99 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1098_linux-3.4.99.patch b/1098_linux-3.4.99.patch new file mode 100644 index 0000000..8bde200 --- /dev/null +++ b/1098_linux-3.4.99.patch @@ -0,0 +1,797 @@ +diff --git a/Makefile b/Makefile +index d277446ee8ee..ed97caf40f71 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 3 + PATCHLEVEL = 4 +-SUBLEVEL = 98 ++SUBLEVEL = 99 + EXTRAVERSION = + NAME = Saber-toothed Squirrel + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 02aee03e713c..1eef567f177a 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -472,7 +472,22 @@ static void power_pmu_read(struct perf_event *event) + } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + local64_add(delta, &event->count); +- local64_sub(delta, &event->hw.period_left); ++ ++ /* ++ * A number of places program the PMC with (0x80000000 - period_left). ++ * We never want period_left to be less than 1 because we will program ++ * the PMC with a value >= 0x800000000 and an edge detected PMC will ++ * roll around to 0 before taking an exception. We have seen this ++ * on POWER8. ++ * ++ * To fix this, clamp the minimum value of period_left to 1. ++ */ ++ do { ++ prev = local64_read(&event->hw.period_left); ++ val = prev - delta; ++ if (val < 1) ++ val = 1; ++ } while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev); + } + + /* +diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c +index be1ef574ce9a..dec49d34c048 100644 +--- a/arch/x86/mm/ioremap.c ++++ b/arch/x86/mm/ioremap.c +@@ -50,6 +50,21 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, + return err; + } + ++static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, ++ void *arg) ++{ ++ unsigned long i; ++ ++ for (i = 0; i < nr_pages; ++i) ++ if (pfn_valid(start_pfn + i) && ++ !PageReserved(pfn_to_page(start_pfn + i))) ++ return 1; ++ ++ WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn); ++ ++ return 0; ++} ++ + /* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses +@@ -93,14 +108,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, + /* + * Don't allow anybody to remap normal RAM that we're using.. + */ ++ pfn = phys_addr >> PAGE_SHIFT; + last_pfn = last_addr >> PAGE_SHIFT; +- for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { +- int is_ram = page_is_ram(pfn); +- +- if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) +- return NULL; +- WARN_ON_ONCE(is_ram); +- } ++ if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, ++ __ioremap_check_ram) == 1) ++ return NULL; + + /* + * Mappings have to be page-aligned +diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c +index 9bdfcf50a190..93f3034ea85d 100644 +--- a/drivers/acpi/battery.c ++++ b/drivers/acpi/battery.c +@@ -34,6 +34,7 @@ + #include <linux/dmi.h> + #include <linux/slab.h> + #include <linux/suspend.h> ++#include <linux/delay.h> + #include <asm/unaligned.h> + + #ifdef CONFIG_ACPI_PROCFS_POWER +@@ -1055,6 +1056,28 @@ static int battery_notify(struct notifier_block *nb, + return 0; + } + ++/* ++ * Some machines'(E,G Lenovo Z480) ECs are not stable ++ * during boot up and this causes battery driver fails to be ++ * probed due to failure of getting battery information ++ * from EC sometimes. After several retries, the operation ++ * may work. So add retry code here and 20ms sleep between ++ * every retries. ++ */ ++static int acpi_battery_update_retry(struct acpi_battery *battery) ++{ ++ int retry, ret; ++ ++ for (retry = 5; retry; retry--) { ++ ret = acpi_battery_update(battery); ++ if (!ret) ++ break; ++ ++ msleep(20); ++ } ++ return ret; ++} ++ + static int acpi_battery_add(struct acpi_device *device) + { + int result = 0; +@@ -1074,9 +1097,11 @@ static int acpi_battery_add(struct acpi_device *device) + if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, + "_BIX", &handle))) + set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); +- result = acpi_battery_update(battery); ++ ++ result = acpi_battery_update_retry(battery); + if (result) + goto fail; ++ + #ifdef CONFIG_ACPI_PROCFS_POWER + result = acpi_battery_add_fs(device); + #endif +diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c +index 52480010156d..739eb0d0d12d 100644 +--- a/drivers/gpu/drm/radeon/rs600.c ++++ b/drivers/gpu/drm/radeon/rs600.c +@@ -539,8 +539,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) + return -EINVAL; + } + addr = addr & 0xFFFFFFFFFFFFF000ULL; +- addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; +- addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE; ++ if (addr != rdev->dummy_page.addr) ++ addr |= R600_PTE_VALID | R600_PTE_READABLE | ++ R600_PTE_WRITEABLE; ++ addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED; + writeq(addr, ptr + (i * 8)); + return 0; + } +diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c +index 80cc465d8ac7..f078baef90fe 100644 +--- a/drivers/hwmon/adm1029.c ++++ b/drivers/hwmon/adm1029.c +@@ -231,6 +231,9 @@ static ssize_t set_fan_div(struct device *dev, + /* Update the value */ + reg = (reg & 0x3F) | (val << 6); + ++ /* Update the cache */ ++ data->fan_div[attr->index] = reg; ++ + /* Write value */ + i2c_smbus_write_byte_data(client, + ADM1029_REG_FAN_DIV[attr->index], reg); +diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c +index f600fa1f92e3..1d2bace45287 100644 +--- a/drivers/hwmon/amc6821.c ++++ b/drivers/hwmon/amc6821.c +@@ -715,7 +715,7 @@ static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, + get_temp_alarm, NULL, IDX_TEMP1_MAX); + static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, + get_temp_alarm, NULL, IDX_TEMP1_CRIT); +-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO | S_IWUSR, ++static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, + get_temp, NULL, IDX_TEMP2_INPUT); + static SENSOR_DEVICE_ATTR(temp2_min, S_IRUGO | S_IWUSR, get_temp, + set_temp, IDX_TEMP2_MIN); +diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c +index 3bd9bd49ed9a..ac339570a805 100644 +--- a/drivers/usb/serial/cp210x.c ++++ b/drivers/usb/serial/cp210x.c +@@ -159,6 +159,7 @@ static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ + { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ ++ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ + { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ + { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ + { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ +diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c +index ed1650fb910d..89b5664aa53b 100644 +--- a/drivers/usb/serial/ftdi_sio.c ++++ b/drivers/usb/serial/ftdi_sio.c +@@ -731,7 +731,8 @@ static struct usb_device_id id_table_combined [] = { + { USB_DEVICE(FTDI_VID, FTDI_ACG_HFDUAL_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_YEI_SERVOCENTER31_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_THORLABS_PID) }, +- { USB_DEVICE(TESTO_VID, TESTO_USB_INTERFACE_PID) }, ++ { USB_DEVICE(TESTO_VID, TESTO_1_PID) }, ++ { USB_DEVICE(TESTO_VID, TESTO_3_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_GAMMA_SCOUT_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13M_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13S_PID) }, +diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h +index 500474c48f4b..106cc16cc6ed 100644 +--- a/drivers/usb/serial/ftdi_sio_ids.h ++++ b/drivers/usb/serial/ftdi_sio_ids.h +@@ -798,7 +798,8 @@ + * Submitted by Colin Leroy + */ + #define TESTO_VID 0x128D +-#define TESTO_USB_INTERFACE_PID 0x0001 ++#define TESTO_1_PID 0x0001 ++#define TESTO_3_PID 0x0003 + + /* + * Mobility Electronics products. +diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c +index 58fcaa5ae548..e1e05bad2be0 100644 +--- a/drivers/usb/serial/option.c ++++ b/drivers/usb/serial/option.c +@@ -1501,6 +1501,8 @@ static const struct usb_device_id option_ids[] = { + .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1426, 0xff, 0xff, 0xff), /* ZTE MF91 */ + .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, ++ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ ++ .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 0b0c03c878e5..f0e4e46867f7 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2589,10 +2589,11 @@ static void print_daily_error_info(unsigned long arg) + es = sbi->s_es; + + if (es->s_error_count) +- ext4_msg(sb, KERN_NOTICE, "error count: %u", ++ /* fsck newer than v1.41.13 is needed to clean this condition. */ ++ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", + le32_to_cpu(es->s_error_count)); + if (es->s_first_error_time) { +- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", ++ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", + sb->s_id, le32_to_cpu(es->s_first_error_time), + (int) sizeof(es->s_first_error_func), + es->s_first_error_func, +@@ -2606,7 +2607,7 @@ static void print_daily_error_info(unsigned long arg) + printk("\n"); + } + if (es->s_last_error_time) { +- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", ++ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", + sb->s_id, le32_to_cpu(es->s_last_error_time), + (int) sizeof(es->s_last_error_func), + es->s_last_error_func, +diff --git a/kernel/cpuset.c b/kernel/cpuset.c +index 4b843ace285c..9cb82b9aeb73 100644 +--- a/kernel/cpuset.c ++++ b/kernel/cpuset.c +@@ -1152,7 +1152,13 @@ done: + + int current_cpuset_is_being_rebound(void) + { +- return task_cs(current) == cpuset_being_rebound; ++ int ret; ++ ++ rcu_read_lock(); ++ ret = task_cs(current) == cpuset_being_rebound; ++ rcu_read_unlock(); ++ ++ return ret; + } + + static int update_relax_domain_level(struct cpuset *cs, s64 val) +diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h +index 14193d596d78..ab29b6a22669 100644 +--- a/kernel/rtmutex-debug.h ++++ b/kernel/rtmutex-debug.h +@@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, + { + return (waiter != NULL); + } ++ ++static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) ++{ ++ debug_rt_mutex_print_deadlock(w); ++} +diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c +index a242e691c993..375ae48479fa 100644 +--- a/kernel/rtmutex.c ++++ b/kernel/rtmutex.c +@@ -81,6 +81,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) + owner = *p; + } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); + } ++ ++/* ++ * Safe fastpath aware unlock: ++ * 1) Clear the waiters bit ++ * 2) Drop lock->wait_lock ++ * 3) Try to unlock the lock with cmpxchg ++ */ ++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) ++ __releases(lock->wait_lock) ++{ ++ struct task_struct *owner = rt_mutex_owner(lock); ++ ++ clear_rt_mutex_waiters(lock); ++ raw_spin_unlock(&lock->wait_lock); ++ /* ++ * If a new waiter comes in between the unlock and the cmpxchg ++ * we have two situations: ++ * ++ * unlock(wait_lock); ++ * lock(wait_lock); ++ * cmpxchg(p, owner, 0) == owner ++ * mark_rt_mutex_waiters(lock); ++ * acquire(lock); ++ * or: ++ * ++ * unlock(wait_lock); ++ * lock(wait_lock); ++ * mark_rt_mutex_waiters(lock); ++ * ++ * cmpxchg(p, owner, 0) != owner ++ * enqueue_waiter(); ++ * unlock(wait_lock); ++ * lock(wait_lock); ++ * wake waiter(); ++ * unlock(wait_lock); ++ * lock(wait_lock); ++ * acquire(lock); ++ */ ++ return rt_mutex_cmpxchg(lock, owner, NULL); ++} ++ + #else + # define rt_mutex_cmpxchg(l,c,n) (0) + static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +@@ -88,6 +129,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) + lock->owner = (struct task_struct *) + ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); + } ++ ++/* ++ * Simple slow path only version: lock->owner is protected by lock->wait_lock. ++ */ ++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) ++ __releases(lock->wait_lock) ++{ ++ lock->owner = NULL; ++ raw_spin_unlock(&lock->wait_lock); ++ return true; ++} + #endif + + /* +@@ -141,6 +193,11 @@ static void rt_mutex_adjust_prio(struct task_struct *task) + */ + int max_lock_depth = 1024; + ++static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) ++{ ++ return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; ++} ++ + /* + * Adjust the priority chain. Also used for deadlock detection. + * Decreases task's usage by one - may thus free the task. +@@ -149,6 +206,7 @@ int max_lock_depth = 1024; + static int rt_mutex_adjust_prio_chain(struct task_struct *task, + int deadlock_detect, + struct rt_mutex *orig_lock, ++ struct rt_mutex *next_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task) + { +@@ -182,7 +240,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + } + put_task_struct(task); + +- return deadlock_detect ? -EDEADLK : 0; ++ return -EDEADLK; + } + retry: + /* +@@ -207,13 +265,32 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + goto out_unlock_pi; + + /* ++ * We dropped all locks after taking a refcount on @task, so ++ * the task might have moved on in the lock chain or even left ++ * the chain completely and blocks now on an unrelated lock or ++ * on @orig_lock. ++ * ++ * We stored the lock on which @task was blocked in @next_lock, ++ * so we can detect the chain change. ++ */ ++ if (next_lock != waiter->lock) ++ goto out_unlock_pi; ++ ++ /* + * Drop out, when the task has no waiters. Note, + * top_waiter can be NULL, when we are in the deboosting + * mode! + */ +- if (top_waiter && (!task_has_pi_waiters(task) || +- top_waiter != task_top_pi_waiter(task))) +- goto out_unlock_pi; ++ if (top_waiter) { ++ if (!task_has_pi_waiters(task)) ++ goto out_unlock_pi; ++ /* ++ * If deadlock detection is off, we stop here if we ++ * are not the top pi waiter of the task. ++ */ ++ if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) ++ goto out_unlock_pi; ++ } + + /* + * When deadlock detection is off then we check, if further +@@ -229,11 +306,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + goto retry; + } + +- /* Deadlock detection */ ++ /* ++ * Deadlock detection. If the lock is the same as the original ++ * lock which caused us to walk the lock chain or if the ++ * current lock is owned by the task which initiated the chain ++ * walk, we detected a deadlock. ++ */ + if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { + debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); + raw_spin_unlock(&lock->wait_lock); +- ret = deadlock_detect ? -EDEADLK : 0; ++ ret = -EDEADLK; + goto out_unlock_pi; + } + +@@ -280,11 +362,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, + __rt_mutex_adjust_prio(task); + } + ++ /* ++ * Check whether the task which owns the current lock is pi ++ * blocked itself. If yes we store a pointer to the lock for ++ * the lock chain change detection above. After we dropped ++ * task->pi_lock next_lock cannot be dereferenced anymore. ++ */ ++ next_lock = task_blocked_on_lock(task); ++ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + top_waiter = rt_mutex_top_waiter(lock); + raw_spin_unlock(&lock->wait_lock); + ++ /* ++ * We reached the end of the lock chain. Stop right here. No ++ * point to go back just to figure that out. ++ */ ++ if (!next_lock) ++ goto out_put_task; ++ + if (!detect_deadlock && waiter != top_waiter) + goto out_put_task; + +@@ -395,8 +492,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + { + struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex_waiter *top_waiter = waiter; +- unsigned long flags; ++ struct rt_mutex *next_lock; + int chain_walk = 0, res; ++ unsigned long flags; ++ ++ /* ++ * Early deadlock detection. We really don't want the task to ++ * enqueue on itself just to untangle the mess later. It's not ++ * only an optimization. We drop the locks, so another waiter ++ * can come in before the chain walk detects the deadlock. So ++ * the other will detect the deadlock and return -EDEADLOCK, ++ * which is wrong, as the other waiter is not in a deadlock ++ * situation. ++ */ ++ if (owner == task) ++ return -EDEADLK; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); +@@ -417,20 +527,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + if (!owner) + return 0; + ++ raw_spin_lock_irqsave(&owner->pi_lock, flags); + if (waiter == rt_mutex_top_waiter(lock)) { +- raw_spin_lock_irqsave(&owner->pi_lock, flags); + plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); + plist_add(&waiter->pi_list_entry, &owner->pi_waiters); + + __rt_mutex_adjust_prio(owner); + if (owner->pi_blocked_on) + chain_walk = 1; +- raw_spin_unlock_irqrestore(&owner->pi_lock, flags); +- } +- else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) ++ } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { + chain_walk = 1; ++ } ++ ++ /* Store the lock on which owner is blocked or NULL */ ++ next_lock = task_blocked_on_lock(owner); + +- if (!chain_walk) ++ raw_spin_unlock_irqrestore(&owner->pi_lock, flags); ++ /* ++ * Even if full deadlock detection is on, if the owner is not ++ * blocked itself, we can avoid finding this out in the chain ++ * walk. ++ */ ++ if (!chain_walk || !next_lock) + return 0; + + /* +@@ -442,8 +560,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + + raw_spin_unlock(&lock->wait_lock); + +- res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, +- task); ++ res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, ++ next_lock, waiter, task); + + raw_spin_lock(&lock->wait_lock); + +@@ -453,7 +571,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, + /* + * Wake up the next waiter on the lock. + * +- * Remove the top waiter from the current tasks waiter list and wake it up. ++ * Remove the top waiter from the current tasks pi waiter list and ++ * wake it up. + * + * Called with lock->wait_lock held. + */ +@@ -474,10 +593,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock) + */ + plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); + +- rt_mutex_set_owner(lock, NULL); ++ /* ++ * As we are waking up the top waiter, and the waiter stays ++ * queued on the lock until it gets the lock, this lock ++ * obviously has waiters. Just set the bit here and this has ++ * the added benefit of forcing all new tasks into the ++ * slow path making sure no task of lower priority than ++ * the top waiter can steal this lock. ++ */ ++ lock->owner = (void *) RT_MUTEX_HAS_WAITERS; + + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + ++ /* ++ * It's safe to dereference waiter as it cannot go away as ++ * long as we hold lock->wait_lock. The waiter task needs to ++ * acquire it in order to dequeue the waiter. ++ */ + wake_up_process(waiter->task); + } + +@@ -492,8 +624,8 @@ static void remove_waiter(struct rt_mutex *lock, + { + int first = (waiter == rt_mutex_top_waiter(lock)); + struct task_struct *owner = rt_mutex_owner(lock); ++ struct rt_mutex *next_lock = NULL; + unsigned long flags; +- int chain_walk = 0; + + raw_spin_lock_irqsave(¤t->pi_lock, flags); + plist_del(&waiter->list_entry, &lock->wait_list); +@@ -517,15 +649,15 @@ static void remove_waiter(struct rt_mutex *lock, + } + __rt_mutex_adjust_prio(owner); + +- if (owner->pi_blocked_on) +- chain_walk = 1; ++ /* Store the lock on which owner is blocked or NULL */ ++ next_lock = task_blocked_on_lock(owner); + + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + } + + WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + +- if (!chain_walk) ++ if (!next_lock) + return; + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ +@@ -533,7 +665,7 @@ static void remove_waiter(struct rt_mutex *lock, + + raw_spin_unlock(&lock->wait_lock); + +- rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); ++ rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); + + raw_spin_lock(&lock->wait_lock); + } +@@ -546,6 +678,7 @@ static void remove_waiter(struct rt_mutex *lock, + void rt_mutex_adjust_pi(struct task_struct *task) + { + struct rt_mutex_waiter *waiter; ++ struct rt_mutex *next_lock; + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); +@@ -555,12 +688,13 @@ void rt_mutex_adjust_pi(struct task_struct *task) + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return; + } +- ++ next_lock = waiter->lock; + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + + /* gets dropped in rt_mutex_adjust_prio_chain()! */ + get_task_struct(task); +- rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); ++ ++ rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); + } + + /** +@@ -612,6 +746,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, + return ret; + } + ++static void rt_mutex_handle_deadlock(int res, int detect_deadlock, ++ struct rt_mutex_waiter *w) ++{ ++ /* ++ * If the result is not -EDEADLOCK or the caller requested ++ * deadlock detection, nothing to do here. ++ */ ++ if (res != -EDEADLOCK || detect_deadlock) ++ return; ++ ++ /* ++ * Yell lowdly and stop the task right here. ++ */ ++ rt_mutex_print_deadlock(w); ++ while (1) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule(); ++ } ++} ++ + /* + * Slow path lock function: + */ +@@ -649,8 +803,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, + + set_current_state(TASK_RUNNING); + +- if (unlikely(ret)) ++ if (unlikely(ret)) { + remove_waiter(lock, &waiter); ++ rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); ++ } + + /* + * try_to_take_rt_mutex() sets the waiter bit +@@ -706,12 +862,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock) + + rt_mutex_deadlock_account_unlock(current); + +- if (!rt_mutex_has_waiters(lock)) { +- lock->owner = NULL; +- raw_spin_unlock(&lock->wait_lock); +- return; ++ /* ++ * We must be careful here if the fast path is enabled. If we ++ * have no waiters queued we cannot set owner to NULL here ++ * because of: ++ * ++ * foo->lock->owner = NULL; ++ * rtmutex_lock(foo->lock); <- fast path ++ * free = atomic_dec_and_test(foo->refcnt); ++ * rtmutex_unlock(foo->lock); <- fast path ++ * if (free) ++ * kfree(foo); ++ * raw_spin_unlock(foo->lock->wait_lock); ++ * ++ * So for the fastpath enabled kernel: ++ * ++ * Nothing can set the waiters bit as long as we hold ++ * lock->wait_lock. So we do the following sequence: ++ * ++ * owner = rt_mutex_owner(lock); ++ * clear_rt_mutex_waiters(lock); ++ * raw_spin_unlock(&lock->wait_lock); ++ * if (cmpxchg(&lock->owner, owner, 0) == owner) ++ * return; ++ * goto retry; ++ * ++ * The fastpath disabled variant is simple as all access to ++ * lock->owner is serialized by lock->wait_lock: ++ * ++ * lock->owner = NULL; ++ * raw_spin_unlock(&lock->wait_lock); ++ */ ++ while (!rt_mutex_has_waiters(lock)) { ++ /* Drops lock->wait_lock ! */ ++ if (unlock_rt_mutex_safe(lock) == true) ++ return; ++ /* Relock the rtmutex and try again */ ++ raw_spin_lock(&lock->wait_lock); + } + ++ /* ++ * The wakeup next waiter path does not suffer from the above ++ * race. See the comments there. ++ */ + wakeup_next_waiter(lock); + + raw_spin_unlock(&lock->wait_lock); +@@ -958,7 +1151,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + return 1; + } + +- ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); ++ /* We enforce deadlock detection for futexes */ ++ ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); + + if (ret && !rt_mutex_owner(lock)) { + /* +diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h +index a1a1dd06421d..f6a1f3c133b1 100644 +--- a/kernel/rtmutex.h ++++ b/kernel/rtmutex.h +@@ -24,3 +24,8 @@ + #define debug_rt_mutex_print_deadlock(w) do { } while (0) + #define debug_rt_mutex_detect_deadlock(w,d) (d) + #define debug_rt_mutex_reset_waiter(w) do { } while (0) ++ ++static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) ++{ ++ WARN(1, "rtmutex deadlock detected\n"); ++} +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index a494ec317e0a..8897a6f5c408 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -1052,7 +1052,6 @@ void tracing_start(void) + + arch_spin_unlock(&ftrace_max_lock); + +- ftrace_start(); + out: + raw_spin_unlock_irqrestore(&tracing_start_lock, flags); + } +@@ -1068,7 +1067,6 @@ void tracing_stop(void) + struct ring_buffer *buffer; + unsigned long flags; + +- ftrace_stop(); + raw_spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index ee50c256fdc3..5cec36b6e838 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1991,7 +1991,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) + } else + *new = *old; + +- rcu_read_lock(); + if (current_cpuset_is_being_rebound()) { + nodemask_t mems = cpuset_mems_allowed(current); + if (new->flags & MPOL_F_REBINDING) +@@ -1999,7 +1998,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) + else + mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); + } +- rcu_read_unlock(); + atomic_set(&new->refcnt, 1); + return new; + }