Hi Richard,

The posix fallback for libatomic locks on unsupported item sizes (using 
pthreads) might be reliable, but is (not surprisingly) somewhat slow.

Whereas the built-in testsuite from libatomic passes ..
..  every Darwin platform from powerpc-darwin9 (on a G5) .. through 
x86-64-darwin14 (on Haswell) fails the gcc/exceptions.exp suite with timeouts.

So here's a platform port
(we don't [yet] have ifuncs).

----------------

The patch does the following:

1) When 4byte atomic operations are supported (will always be the case when the 
lib is built as part of GCC) these are used to guard accesses.

2) When atomic ops are not available, OSSpinLocks are used (available since 
forever on the OS).  These are also 4byte locks on all current platforms.

3) Some heuristic fiddling with the algorithm for hashing addresses - to try 
and avoid cache turbulence when items are close-ish together (not unlikely in a 
typical code).  This might yet bear some more tweaking, but seems OK enough for 
a first go.

4) We use low-level Mach interfaces to give up our timeslice when blocked, to 
keep overheads to a minimum.

5) We allow the port to specify the minimum processor that will be available.
   E.G. for x86 darwin defaults to core2, which means that we don't start 
guarding small items which we could lock natively.

================

Tested across the patch for a while on darwin9..13 (and by Dominique on 
darwin14)
I jammed the atomic version off to test the Spinlock-based timings.

Typical runtime results  for gcc: atomic.exp 

[all == timeout with trunk implementation]

Port Version            Spinlock                Atomic
=======================================================
powerpc-darwin9         ~30mins                 ~15mins 2G5 G5
x86-64-darwin{12,13}    ~15mins                 ~6mins  2G8 Xeon, 2G6 Ivy bridge
x86-64-darwin14         -------                 ~3mins  (Haswell, AFAIK)

This (for x86-64, Xeon) is about the same as I see on our Linux machines.

OK for trunk?

This is also functional on gcc-4.8 and gcc-4.9...
.. what would the feeling be about back-porting?

Iain

libatomic:

        * config/darwin/host-config.h New.
        * config/darwin/lock.c New.
        * configure.tgt (DEFAULT_X86_CPU): New, (target): New entry for darwin.

From fdbf91b9fb20992231a370f0e5cd803085b4f69e Mon Sep 17 00:00:00 2001
From: Iain Sandoe <i...@codesourcery.com>
Date: Wed, 15 Oct 2014 10:49:40 +0100
Subject: [PATCH] Initial draft of a Darwin port for libatomic

---
 libatomic/config/darwin/host-config.h |  55 ++++++++++
 libatomic/config/darwin/lock.c        | 187 ++++++++++++++++++++++++++++++++++
 libatomic/configure.tgt               |  21 +++-
 3 files changed, 260 insertions(+), 3 deletions(-)
 create mode 100644 libatomic/config/darwin/host-config.h
 create mode 100644 libatomic/config/darwin/lock.c

diff --git a/libatomic/config/darwin/host-config.h 
b/libatomic/config/darwin/host-config.h
new file mode 100644
index 0000000..db55d34
--- /dev/null
+++ b/libatomic/config/darwin/host-config.h
@@ -0,0 +1,55 @@
+/* Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <r...@redhat.com>.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Included after all more target-specific host-config.h.  */
+
+
+#ifndef protect_start_end
+# ifdef HAVE_ATTRIBUTE_VISIBILITY
+#  pragma GCC visibility push(hidden)
+# endif
+
+void libat_lock_1 (void *ptr);
+void libat_unlock_1 (void *ptr);
+
+static inline UWORD
+protect_start (void *ptr)
+{
+  libat_lock_1 (ptr);
+  return 0;
+}
+
+static inline void
+protect_end (void *ptr, UWORD dummy UNUSED)
+{
+  libat_unlock_1 (ptr);
+}
+
+# define protect_start_end 1
+# ifdef HAVE_ATTRIBUTE_VISIBILITY
+#  pragma GCC visibility pop
+# endif
+#endif /* protect_start_end */
+
+#include_next <host-config.h>
diff --git a/libatomic/config/darwin/lock.c b/libatomic/config/darwin/lock.c
new file mode 100644
index 0000000..286b9df
--- /dev/null
+++ b/libatomic/config/darwin/lock.c
@@ -0,0 +1,187 @@
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Iain Sandoe <i...@codesourcery.com>.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include "libatomic_i.h"
+
+/* For items that must be guarded by a lock, we use the following strategy:
+   If atomic support is available for a unit32_t we use that.
+   If not we use the Darwin OSSpinLock implementation. */
+
+/* The target page size.  Must be no larger than the runtime page size,
+   lest locking fail with virtual address aliasing (i.e. a page mmaped
+   at two locations).  */
+#ifndef PAGE_SIZE
+#  define PAGE_SIZE 4096
+#endif
+
+/* The target cacheline size.  */
+#ifndef CACHLINE_SIZE
+#  define CACHLINE_SIZE 64
+#endif
+
+/* The granularity at which locks are applied when n > CACHLINE_SIZE.
+   We follow the posix pthreads implementation here.  */
+#ifndef WATCH_SIZE
+#  define WATCH_SIZE   CACHLINE_SIZE
+#endif
+
+#if HAVE_ATOMIC_EXCHANGE_4 && HAVE_ATOMIC_LDST_4
+
+#  include <stdatomic.h>
+#  include <mach/mach_traps.h>
+#  include <mach/thread_switch.h>
+
+#  ifndef USE_ATOMIC
+#    define USE_ATOMIC 1
+#  endif
+
+inline static void LockUnlock(uint32_t *l) {
+  __atomic_store_4((_Atomic(uint32_t)*)l, 0, __ATOMIC_RELEASE);
+}
+
+/* This is a number the number of tries we will make to acquire the lock
+   before giving up our time-slice (on the basis that we are guarding small
+   sections of code here and, therefore if we don't acquire the lock quickly,
+   that implies that the current holder is not active).  */
+#  define NSPINS 4
+inline static void LockLock(uint32_t *l) {
+  uint32_t old = 0;
+  uint32_t n = NSPINS;
+  while (!__atomic_compare_exchange_4((_Atomic(uint32_t)*)l, &old,
+        1, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+     old = 0;
+    if (--n == 0) {
+      /* Give up this time-slice, no hint to the scheduler about what to pick.
+         TODO: maybe see if it's worth preserving some info about presence of
+         waiting processes - to allow a similar "give up" time-slice scheme on
+         the unlock end.  */
+      thread_switch((mach_port_name_t)0, SWITCH_OPTION_NONE,
+                   MACH_MSG_TIMEOUT_NONE);
+      n = NSPINS;
+    }
+  }
+}
+
+#  define LOCK_SIZE sizeof(uint32_t)
+#  define NLOCKS (PAGE_SIZE / LOCK_SIZE)
+static uint32_t locks[NLOCKS];
+
+#else
+
+#  include <libkern/OSAtomic.h>
+
+#  ifndef USE_ATOMIC
+#    define USE_ATOMIC 0
+#  endif
+
+#  define LOCK_SIZE sizeof(OSSpinLock)
+#  define NLOCKS               (PAGE_SIZE / LOCK_SIZE)
+static OSSpinLock locks[NLOCKS];
+
+#endif
+
+/* A hash function that assumes that entities of a given size are at least
+   aligned to that size, and tries to minimise the probability that adjacent
+   objects will end up using the same cache line in the locks.  */
+static inline uintptr_t
+addr_hash (void *ptr, size_t n)
+{
+  if (n <= CACHLINE_SIZE)
+    n = sizeof(unsigned int)*8 - __builtin_clz((unsigned int) n) -1;
+  else
+    n = 7;
+
+  uint16_t x = (((uintptr_t)ptr) >> n);
+  x ^= n;
+  x = ((x >> 8) & 0xff) | ((x << 8) & 0xff00);
+  return x % NLOCKS;
+}
+
+void
+libat_lock_1 (void *ptr)
+{
+#if USE_ATOMIC
+  LockLock (&locks[addr_hash (ptr, 1)]);
+#else
+  OSSpinLockLock(&locks[addr_hash (ptr, 1)]);
+#endif
+}
+
+void
+libat_unlock_1 (void *ptr)
+{
+#if USE_ATOMIC
+  LockUnlock (&locks[addr_hash (ptr, 1)]);
+#else
+  OSSpinLockUnlock (&locks[addr_hash (ptr, 1)]);
+#endif
+}
+
+void
+libat_lock_n (void *ptr, size_t n)
+{
+  uintptr_t h = addr_hash (ptr, n);
+
+  /* Don't lock more than all the locks we have.  */
+  if (n > PAGE_SIZE)
+    n = PAGE_SIZE;
+
+  size_t i = 0;
+  do
+    {
+#if USE_ATOMIC
+      LockLock (&locks[h]);
+#else
+      OSSpinLockLock(&locks[h]);
+#endif
+      if (++h == NLOCKS)
+       h = 0;
+      i += WATCH_SIZE;
+    }
+  while (i < n);
+}
+
+void
+libat_unlock_n (void *ptr, size_t n)
+{
+  uintptr_t h = addr_hash (ptr, n);
+
+  if (n > PAGE_SIZE)
+    n = PAGE_SIZE;
+
+  size_t i = 0;
+  do
+    {
+#if USE_ATOMIC
+      LockUnlock (&locks[h]);
+#else
+      OSSpinLockUnlock (&locks[h]);
+#endif
+      if (++h == NLOCKS)
+       h = 0;
+      i += WATCH_SIZE;
+    }
+  while (i < n);
+}
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index b0344d5..8283012 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -26,6 +26,16 @@
 # Map the target cpu to an ARCH sub-directory.  At the same time,
 # work out any special compilation flags as necessary.
 
+case "${target}" in
+  *-*-darwin*)
+    # Use the same default as GCC.
+    DEFAULT_X86_CPU=core2
+    ;;
+  *)
+    DEFAULT_X86_CPU=i486
+    ;;
+esac
+
 case "${target_cpu}" in
   alpha*)
        # fenv.c needs this option to generate inexact exceptions.
@@ -67,7 +77,7 @@ case "${target_cpu}" in
            ;;
          *)
            if test -z "$with_arch"; then
-             XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
+             XCFLAGS="${XCFLAGS} -march=$DEFAULT_X86_CPU -mtune=${target_cpu}"
              XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
            fi
        esac
@@ -78,7 +88,7 @@ case "${target_cpu}" in
   x86_64)
        case " ${CC} ${CFLAGS} " in
          *" -m32 "*)
-           XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
+           XCFLAGS="${XCFLAGS} -march=$DEFAULT_X86_CPU -mtune=generic"
            XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
            ;;
          *)
@@ -107,11 +117,16 @@ case "${target}" in
   *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu \
   | *-*-netbsd* | *-*-freebsd* | *-*-openbsd* \
   | *-*-solaris2* | *-*-sysv4* | *-*-irix6* | *-*-osf* | *-*-hpux11* \
-  | *-*-darwin* | *-*-aix* | *-*-cygwin*)
+  | *-*-aix* | *-*-cygwin*)
        # POSIX system.  The OS is supported.
        config_path="${config_path} posix"
        ;;
 
+  *-*-darwin*)
+       # Darwin system.  The OS is supported.
+       config_path="${config_path} darwin"
+       ;;
+
   *-*-mingw*)
        # OS support for atomic primitives.
         case ${target_thread_file} in
-- 
1.8.4.2


Reply via email to