Hi, I am trying to ceph-deploy with Hammer on rhel7. While trying to activate the OSD using ceph-deploy on admin-node, the below step hangs. I tried to run it manually on the osd-node and tried tracing using "python -m trace --trace" . It looks like it is stuck in some threading.py code. Can someone please help?
[*ceph-vm-osd1*][*WARNIN*] INFO:ceph-disk:Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd create --concise c2d19639-f3ec-447d-9a7c-a180a226dded [*ceph-vm-osd1*][*WARNIN*] No data was received after 300 seconds, disconnecting... *Manual run with --verbose option:* [cloud-user@ceph-vm-osd1 ~]$ sudo /usr/bin/ceph --verbose --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd create c2d19639-f3ec-447d-9a7c-a180a226dded parsed_args: Namespace(admin_socket=None, admin_socket_nope=None, cephconf=None, client_id=None, client_name='client.bootstrap-osd', cluster='ceph', cluster_timeout=None, completion=False, help=False, input_file=None, output_file=None, output_format=None, status=False, verbose=True, version=False, watch=False, watch_debug=False, watch_error=False, watch_info=False, watch_sec=False, watch_warn=False), childargs: ['--keyring', '/var/lib/ceph/bootstrap-osd/ceph.keyring', 'osd', 'create', 'c2d19639-f3ec-447d-9a7c-a180a226dded'] ^CError connecting to cluster: InterruptedOrTimeoutError *Manual run with python -m trace --trace :* <<truncated>> --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: threading.py(946): self.__block.acquire() threading.py(947): try: threading.py(948): if timeout is None: threading.py(954): deadline = _time() + timeout threading.py(955): while not self.__stopped: threading.py(956): delay = deadline - _time() threading.py(957): if delay <= 0: threading.py(961): self.__block.wait(delay, balancing) --- modulename: threading, funcname: wait threading.py(331): if not self._is_owned(): --- modulename: threading, funcname: _is_owned threading.py(302): if self.__lock.acquire(0): threading.py(306): return True threading.py(333): waiter = _allocate_lock() threading.py(334): waiter.acquire() threading.py(335): self.__waiters.append(waiter) threading.py(336): saved_state = self._release_save() --- modulename: threading, funcname: _release_save threading.py(294): self.__lock.release() # No state to save threading.py(337): try: # restore state no matter what (e.g., KeyboardInterrupt) threading.py(338): if timeout is None: threading.py(348): endtime = _time() + timeout threading.py(349): delay = 0.0005 # 500 us -> initial delay of 1 ms threading.py(350): while True: threading.py(351): gotit = waiter.acquire(0) threading.py(352): if gotit: threading.py(354): remaining = endtime - _time() threading.py(355): if remaining <= 0: threading.py(357): if balancing: threading.py(358): delay = min(delay * 2, remaining, 0.05) threading.py(361): _sleep(delay) --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: --- modulename: threading, funcname: __stop threading.py(870): if not hasattr(self, '_Thread__block'): threading.py(872): self.__block.acquire() threading.py(873): self.__stopped = True threading.py(874): self.__block.notify_all() --- modulename: threading, funcname: notifyAll threading.py(409): self.notify(len(self.__waiters)) --- modulename: threading, funcname: notify threading.py(385): if not self._is_owned(): --- modulename: threading, funcname: _is_owned threading.py(302): if self.__lock.acquire(0): threading.py(306): return True threading.py(387): __waiters = self.__waiters threading.py(388): waiters = __waiters[:n] threading.py(389): if not waiters: threading.py(393): self._note("%s.notify(): notifying %d waiter%s", self, n, threading.py(394): n!=1 and "s" or "") --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: threading.py(395): for waiter in waiters: threading.py(396): waiter.release() threading.py(397): try: threading.py(398): __waiters.remove(waiter) threading.py(395): for waiter in waiters: threading.py(875): self.__block.release() threading.py(350): while True: threading.py(351): gotit = waiter.acquire(0) threading.py(352): if gotit: threading.py(353): break threading.py(362): if not gotit: threading.py(371): self._note("%s.wait(%s): got it", self, timeout) --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: threading.py(373): self._acquire_restore(saved_state) --- modulename: threading, funcname: _acquire_restore threading.py(297): self.__lock.acquire() # Ignore saved state threading.py(955): while not self.__stopped: threading.py(964): self._note("%s.join(): thread stopped", self) --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: threading.py(966): self.__block.release() rados.py(178): if timeout and t.is_alive(): rados.py(176): while t.is_alive(): --- modulename: threading, funcname: isAlive threading.py(1004): assert self.__initialized, "Thread.__init__() not called" threading.py(1005): return self.__started.is_set() and not self.__stopped --- modulename: threading, funcname: isSet threading.py(572): return self.__flag rados.py(183): t.join() # in case t exits before reaching the join() above --- modulename: threading, funcname: join threading.py(936): if not self.__initialized: threading.py(938): if not self.__started.is_set(): --- modulename: threading, funcname: isSet threading.py(572): return self.__flag threading.py(940): if self is current_thread(): --- modulename: threading, funcname: currentThread threading.py(1160): try: threading.py(1161): return _active[_get_ident()] threading.py(944): if not self.__stopped: threading.py(946): self.__block.acquire() threading.py(947): try: threading.py(948): if timeout is None: threading.py(949): while not self.__stopped: threading.py(952): self._note("%s.join(): thread stopped", self) --- modulename: threading, funcname: _note threading.py(64): if self.__verbose: threading.py(966): self.__block.release() rados.py(193): if interrupt: rados.py(195): return t.retval rados.py(265): self.state = "shutdown" ceph(916): sys.exit(retval) --- modulename: trace, funcname: _unsettrace Thanks, Pavana
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com