Linus Torvalds (4):
      lockref: add 'lockref_get_or_lock() helper
      vfs: reimplement d_rcu_to_refcount() using lockref_get_or_lock()
      lockref: uninline lockref helper functions
      lockref: implement lockless reference count updates using cmpxchg()

Sedat Dilek (6):
      kbuild: deb-pkg: Try to determine distribution
      kbuild: deb-pkg: Bump year in debian/copyright file
      kbuild: deb-pkg: Update git repository URL in debian/copyright file
      Merge branch 'deb-pkg-3.10-fixes' into 3.11.0-1-lockref-small
      Merge branch 'locked-reference-counts' into 3.11.0-1-lockref-small
      Merge branch 'lockref-3.12-fixes' into 3.11.0-1-lockref-small

Tony Luck (1):
      lockref: Relax in cmpxchg loop

Waiman Long (1):
      vfs: use lockref_get_not_zero() for optimistic lockless dget_parent()

 arch/x86/Kconfig                |   1 +
 arch/x86/include/asm/spinlock.h |   5 ++
 fs/dcache.c                     |  17 +++++-
 fs/namei.c                      |  90 ++++++++++++++++++++--------
 include/linux/dcache.h          |  22 -------
 include/linux/lockref.h         |  61 ++++---------------
 lib/Kconfig                     |  10 ++++
 lib/Makefile                    |   1 +
 lib/lockref.c                   | 128 ++++++++++++++++++++++++++++++++++++++++
 scripts/package/builddeb        |  19 +++++-
 10 files changed, 254 insertions(+), 100 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf9..67e0074 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
 	def_bool y
 	depends on 64BIT
 	select X86_DEV_DMA_OPS
+	select ARCH_USE_CMPXCHG_LOCKREF
 
 ### Arch settings
 config X86
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7d..e0e6684 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,11 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.tickets.head == lock.tickets.tail;
+}
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
diff --git a/fs/dcache.c b/fs/dcache.c
index b949af8..96655f4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -611,8 +611,23 @@ static inline void __dget(struct dentry *dentry)
 
 struct dentry *dget_parent(struct dentry *dentry)
 {
+	int gotref;
 	struct dentry *ret;
 
+	/*
+	 * Do optimistic parent lookup without any
+	 * locking.
+	 */
+	rcu_read_lock();
+	ret = ACCESS_ONCE(dentry->d_parent);
+	gotref = lockref_get_not_zero(&ret->d_lockref);
+	rcu_read_unlock();
+	if (likely(gotref)) {
+		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
+			return ret;
+		dput(ret);
+	}
+
 repeat:
 	/*
 	 * Don't need rcu_dereference because we re-check it was correct under
@@ -1771,7 +1786,7 @@ static noinline enum slow_d_compare slow_dentry_cmp(
  * without taking d_lock and checking d_seq sequence count against @seq
  * returned here.
  *
- * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * A refcount may be taken on the found dentry with the d_rcu_to_refcount
  * function.
  *
  * Alternatively, __d_lookup_rcu may be called again to look up the child of
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd..2c30c84 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -494,6 +494,50 @@ static inline void unlock_rcu_walk(void)
 	br_read_unlock(&vfsmount_lock);
 }
 
+/*
+ * When we move over from the RCU domain to properly refcounted
+ * long-lived dentries, we need to check the sequence numbers
+ * we got before lookup very carefully.
+ *
+ * We cannot blindly increment a dentry refcount - even if it
+ * is not locked - if it is zero, because it may have gone
+ * through the final d_kill() logic already.
+ *
+ * So for a zero refcount, we need to get the spinlock (which is
+ * safe even for a dead dentry because the de-allocation is
+ * RCU-delayed), and check the sequence count under the lock.
+ *
+ * Once we have checked the sequence count, we know it is live,
+ * and since we hold the spinlock it cannot die from under us.
+ *
+ * In contrast, if the reference count wasn't zero, we can just
+ * increment the lockref without having to take the spinlock.
+ * Even if the sequence number ends up being stale, we haven't
+ * gone through the final dput() and killed the dentry yet.
+ */
+static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
+{
+	int gotref;
+
+	gotref = lockref_get_or_lock(&dentry->d_lockref);
+
+	/* Does the sequence number still match? */
+	if (read_seqcount_retry(validate, seq)) {
+		if (gotref)
+			dput(dentry);
+		else
+			spin_unlock(&dentry->d_lock);
+		return -ECHILD;
+	}
+
+	/* Get the ref now, if we couldn't get it originally */
+	if (!gotref) {
+		dentry->d_lockref.count++;
+		spin_unlock(&dentry->d_lock);
+	}
+	return 0;
+}
+
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
@@ -518,29 +562,28 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 				nd->root.dentry != fs->root.dentry)
 			goto err_root;
 	}
-	spin_lock(&parent->d_lock);
+
+	/*
+	 * For a negative lookup, the lookup sequence point is the parents
+	 * sequence point, and it only needs to revalidate the parent dentry.
+	 *
+	 * For a positive lookup, we need to move both the parent and the
+	 * dentry from the RCU domain to be properly refcounted. And the
+	 * sequence number in the dentry validates *both* dentry counters,
+	 * since we checked the sequence number of the parent after we got
+	 * the child sequence number. So we know the parent must still
+	 * be valid if the child sequence number is still valid.
+	 */
 	if (!dentry) {
-		if (!__d_rcu_to_refcount(parent, nd->seq))
-			goto err_parent;
+		if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
+			goto err_root;
 		BUG_ON(nd->inode != parent->d_inode);
 	} else {
-		if (dentry->d_parent != parent)
+		if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
+			goto err_root;
+		if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
 			goto err_parent;
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!__d_rcu_to_refcount(dentry, nd->seq))
-			goto err_child;
-		/*
-		 * If the sequence check on the child dentry passed, then
-		 * the child has not been removed from its parent. This
-		 * means the parent dentry must be valid and able to take
-		 * a reference at this point.
-		 */
-		BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
-		BUG_ON(!parent->d_lockref.count);
-		parent->d_lockref.count++;
-		spin_unlock(&dentry->d_lock);
 	}
-	spin_unlock(&parent->d_lock);
 	if (want_root) {
 		path_get(&nd->root);
 		spin_unlock(&fs->lock);
@@ -551,10 +594,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 	nd->flags &= ~LOOKUP_RCU;
 	return 0;
 
-err_child:
-	spin_unlock(&dentry->d_lock);
 err_parent:
-	spin_unlock(&parent->d_lock);
+	dput(dentry);
 err_root:
 	if (want_root)
 		spin_unlock(&fs->lock);
@@ -585,14 +626,11 @@ static int complete_walk(struct nameidata *nd)
 		nd->flags &= ~LOOKUP_RCU;
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		spin_lock(&dentry->d_lock);
-		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
-			spin_unlock(&dentry->d_lock);
+
+		if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
 			unlock_rcu_walk();
 			return -ECHILD;
 		}
-		BUG_ON(nd->inode != dentry->d_inode);
-		spin_unlock(&dentry->d_lock);
 		mntget(nd->path.mnt);
 		unlock_rcu_walk();
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index efdc944..9169b91 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
 extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
 				const struct qstr *name, unsigned *seq);
 
-/**
- * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok
- * @dentry: dentry to take a ref on
- * @seq: seqcount to verify against
- * Returns: 0 on failure, else 1.
- *
- * __d_rcu_to_refcount operates on a dentry,seq pair that was returned
- * by __d_lookup_rcu, to get a reference on an rcu-walk dentry.
- */
-static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq)
-{
-	int ret = 0;
-
-	assert_spin_locked(&dentry->d_lock);
-	if (!read_seqcount_retry(&dentry->d_seq, seq)) {
-		ret = 1;
-		dentry->d_lockref.count++;
-	}
-
-	return ret;
-}
-
 static inline unsigned d_count(const struct dentry *dentry)
 {
 	return dentry->d_lockref.count;
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 01233e0..ca07b50 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -17,55 +17,20 @@
 #include <linux/spinlock.h>
 
 struct lockref {
-	spinlock_t lock;
-	unsigned int count;
+	union {
+#ifdef CONFIG_CMPXCHG_LOCKREF
+		aligned_u64 lock_count;
+#endif
+		struct {
+			spinlock_t lock;
+			unsigned int count;
+		};
+	};
 };
 
-/**
- * lockref_get - Increments reference count unconditionally
- * @lockcnt: pointer to lockref structure
- *
- * This operation is only valid if you already hold a reference
- * to the object, so you know the count cannot be zero.
- */
-static inline void lockref_get(struct lockref *lockref)
-{
-	spin_lock(&lockref->lock);
-	lockref->count++;
-	spin_unlock(&lockref->lock);
-}
-
-/**
- * lockref_get_not_zero - Increments count unless the count is 0
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count is 0
- */
-static inline int lockref_get_not_zero(struct lockref *lockref)
-{
-	int retval = 0;
-
-	spin_lock(&lockref->lock);
-	if (lockref->count) {
-		lockref->count++;
-		retval = 1;
-	}
-	spin_unlock(&lockref->lock);
-	return retval;
-}
-
-/**
- * lockref_put_or_lock - decrements count unless count <= 1 before decrement
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
- */
-static inline int lockref_put_or_lock(struct lockref *lockref)
-{
-	spin_lock(&lockref->lock);
-	if (lockref->count <= 1)
-		return 0;
-	lockref->count--;
-	spin_unlock(&lockref->lock);
-	return 1;
-}
+extern void lockref_get(struct lockref *);
+extern int lockref_get_not_zero(struct lockref *);
+extern int lockref_get_or_lock(struct lockref *);
+extern int lockref_put_or_lock(struct lockref *);
 
 #endif /* __LINUX_LOCKREF_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 71d9f81..6556171 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -48,6 +48,16 @@ config STMP_DEVICE
 config PERCPU_RWSEM
 	boolean
 
+config ARCH_USE_CMPXCHG_LOCKREF
+	bool
+
+config CMPXCHG_LOCKREF
+	def_bool y if ARCH_USE_CMPXCHG_LOCKREF
+	depends on SMP
+	depends on !GENERIC_LOCKBREAK
+	depends on !DEBUG_SPINLOCK
+	depends on !DEBUG_LOCK_ALLOC
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 7baccfd..f2cb308 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o klist.o
+obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/lockref.c b/lib/lockref.c
new file mode 100644
index 0000000..9d76f40
--- /dev/null
+++ b/lib/lockref.c
@@ -0,0 +1,128 @@
+#include <linux/export.h>
+#include <linux/lockref.h>
+
+#ifdef CONFIG_CMPXCHG_LOCKREF
+
+/*
+ * Note that the "cmpxchg()" reloads the "old" value for the
+ * failure case.
+ */
+#define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
+	struct lockref old;							\
+	BUILD_BUG_ON(sizeof(old) != 8);						\
+	old.lock_count = ACCESS_ONCE(lockref->lock_count);			\
+	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
+		struct lockref new = old, prev = old;				\
+		CODE								\
+		old.lock_count = cmpxchg(&lockref->lock_count,			\
+					 old.lock_count, new.lock_count);	\
+		if (likely(old.lock_count == prev.lock_count)) {		\
+			SUCCESS;						\
+		}								\
+		cpu_relax();							\
+	}									\
+} while (0)
+
+#else
+
+#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
+
+#endif
+
+/**
+ * lockref_get - Increments reference count unconditionally
+ * @lockcnt: pointer to lockref structure
+ *
+ * This operation is only valid if you already hold a reference
+ * to the object, so you know the count cannot be zero.
+ */
+void lockref_get(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count++;
+	,
+		return;
+	);
+
+	spin_lock(&lockref->lock);
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+}
+EXPORT_SYMBOL(lockref_get);
+
+/**
+ * lockref_get_not_zero - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ */
+int lockref_get_not_zero(struct lockref *lockref)
+{
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			return 0;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	retval = 0;
+	if (lockref->count) {
+		lockref->count++;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_get_not_zero);
+
+/**
+ * lockref_get_or_lock - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ * and we got the lock instead.
+ */
+int lockref_get_or_lock(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			break;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	if (!lockref->count)
+		return 0;
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_get_or_lock);
+
+/**
+ * lockref_put_or_lock - decrements count unless count <= 1 before decrement
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
+ */
+int lockref_put_or_lock(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count--;
+		if (old.count <= 1)
+			break;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	if (lockref->count <= 1)
+		return 0;
+	lockref->count--;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_put_or_lock);
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index acb8650..7d7c9d8 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -172,9 +172,22 @@ else
 fi
 maintainer="$name <$email>"
 
+# Try to determine distribution
+if [ -e $(which lsb_release) ]; then
+       codename=$(lsb_release --codename --short)
+       if [ "$codename" != "" ]; then
+		distribution=$codename
+       else
+		distribution="UNRELEASED"
+		echo "WARNING: The distribution could NOT be determined!"
+       fi
+else
+       echo "HINT: Install lsb_release binary, this helps to identify your distribution!"
+fi
+
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
-linux-upstream ($packageversion) unstable; urgency=low
+linux-upstream ($packageversion) $distribution; urgency=low
 
   * Custom built Linux kernel.
 
@@ -188,10 +201,10 @@ This is a packacked upstream version of the Linux kernel.
 The sources may be found at most Linux ftp sites, including:
 ftp://ftp.kernel.org/pub/linux/kernel
 
-Copyright: 1991 - 2009 Linus Torvalds and others.
+Copyright: 1991 - 2013 Linus Torvalds and others.
 
 The git repository for mainline kernel development is at:
-git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
