Hello, We were occasionally experiencing client exceptions with 0.6.3, so we upgraded to 0.7.0 a couple weeks ago, but unfortunately we now get more client exceptions, and more frequently. Also, occasionally nodetool ring will show a node Down even though cassandra is still running and the node will be up again shortly. We run nodetool ring every half hour or so for monitoring, otherwise we probably would not have noticed.
I'm trying to determine whether we are hitting some bugs, just don't have enough hardware for our application, or have made some error in configuration. I would happy to provide any more information or run tests to narrow down the problem. Below are some exceptions we see several distinct times every day: 1) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/columnfamily.py", line 430, in get self._rcl(read_consistency_level)) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 397, in new_f result = getattr(super(ConnectionWrapper, self), f.__name__)(*args, **kwargs) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/cassandra/Cassandra.py", line 432, in get_slice return self.recv_get_slice() File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/cassandra/Cassandra.py", line 446, in recv_get_slice (fname, mtype, rseqid) = self._iprot.readMessageBegin() File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/protocol/TBinaryProtocol.py", line 126, in readMessageBegin sz = self.readI32() File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/protocol/TBinaryProtocol.py", line 203, in readI32 buff = self.trans.readAll(4) File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TTransport.py", line 63, in readAll raise EOFError() EOFError 2) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/columnfamily.py", line 710, in insert res = self._tlocal.client.insert(key, cp, column, self._wcl(write_consistency_level)) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 421, in new_f return new_f(self, *args, **kwargs) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 421, in new_f return new_f(self, *args, **kwargs) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 421, in new_f return new_f(self, *args, **kwargs) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 406, in new_f raise MaximumRetryException('Retried %d times' % self._retry_count) MaximumRetryException: Retried 4 times 3) return ColumnFamily(client, bucket_name) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/columnfamily.py", line 122, in __init__ self._obtain_connection() File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/columnfamily.py", line 359, in _obtain_connection self._tlocal.client = self.pool.get() File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 724, in get conn = self._create_connection() File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 122, in _create_connection wrapper = self._get_new_wrapper(server) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 640, in _get_new_wrapper credentials=self.credentials) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/pool.py", line 317, in __init__ super(ConnectionWrapper, self).__init__(*args, **kwargs) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/connection.py", line 38, in __init__ server_api_version = int(self.describe_version().split('.', 1)[0]) File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/cassandra/Cassandra.py", line 947, in describe_version return self.recv_describe_version() File "/usr/local/lib/python2.6/dist-packages/pycassa-1.0.4-py2.6.egg/pycassa/cassandra/Cassandra.py", line 957, in recv_describe_version (fname, mtype, rseqid) = self._iprot.readMessageBegin() File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/protocol/TBinaryProtocol.py", line 126, in readMessageBegin sz = self.readI32() File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/protocol/TBinaryProtocol.py", line 203, in readI32 buff = self.trans.readAll(4) File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TTransport.py", line 58, in readAll chunk = self.read(sz-have) File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TTransport.py", line 272, in read self.readFrame() File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TTransport.py", line 276, in readFrame buff = self.__trans.readAll(4) File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TTransport.py", line 58, in readAll chunk = self.read(sz-have) File "/usr/local/lib/python2.6/dist-packages/Thrift-0.5.0-py2.6-linux-i686.egg/thrift/transport/TSocket.py", line 94, in read buff = self.handle.recv(sz) error: [Errno 104] Connection reset by peer Thanks in advance for any insight, Andy