On Aug 5, 2014, at 12:16 PM, Brian Geffon <briangef...@gmail.com> wrote:
> Technically you'd probably aggregate all of these into a single variable > for production monitoring, but having insight into the individual failure > scenarios could prove to be very valuable. I'm still very dubious about these stats. SSL_ERROR_WANT_WRITE, SSL_ERROR_WANT_READ, and SSL_ERROR_WANT_X509_LOOKUP boil down to rescheduling the I/O. Hard to see how a metric for that is useful. The other things here end up as an error or EOF. Again, hard to see how this is helpful as a metric. Handshake errors (client and server) seems like they could be interesting to monitor, but these are bundled in "ssl_error_ssl", so you'll really have no idea what is happening. "ssl_sni_name_set_failure" would be an application error, not a metric. It would only happen when setting a hostname of > 255 bytes. If that happens, an error log is much more helpful. I'm +1 on stats, but these could be much more useful. > On Tue, Aug 5, 2014 at 12:11 PM, James Peach <jamespe...@me.com> wrote: > >> What is the use case for these stats? I can see how the logs might be too >> noisy, but they don't seem like good candidates for metrics. How would use >> these for monitoring? >> >>> On Aug 5, 2014, at 11:34 AM, bri...@apache.org wrote: >>> >>> Repository: trafficserver >>> Updated Branches: >>> refs/heads/master b4343175e -> d9aba01de >>> >>> >>> TS-2986: Adding stats to TLS errors >>> >>> >>> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo >>> Commit: >> http://git-wip-us.apache.org/repos/asf/trafficserver/commit/a8070bbb >>> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/a8070bbb >>> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/a8070bbb >>> >>> Branch: refs/heads/master >>> Commit: a8070bbb80f2e709f91d4c8b47d9ded4d55bdcdc >>> Parents: b434317 >>> Author: Brian Geffon <bri...@apache.org> >>> Authored: Tue Aug 5 11:34:07 2014 -0700 >>> Committer: Brian Geffon <bri...@apache.org> >>> Committed: Tue Aug 5 11:34:07 2014 -0700 >>> >>> ---------------------------------------------------------------------- >>> iocore/net/P_SSLUtils.h | 10 ++++++ >>> iocore/net/SSLNetVConnection.cc | 60 +++++++++++++++++++++++++----------- >>> iocore/net/SSLUtils.cc | 28 +++++++++++++++++ >>> 3 files changed, 80 insertions(+), 18 deletions(-) >>> ---------------------------------------------------------------------- >>> >>> >>> >> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/a8070bbb/iocore/net/P_SSLUtils.h >>> ---------------------------------------------------------------------- >>> diff --git a/iocore/net/P_SSLUtils.h b/iocore/net/P_SSLUtils.h >>> index b1bf01c..6e44be3 100644 >>> --- a/iocore/net/P_SSLUtils.h >>> +++ b/iocore/net/P_SSLUtils.h >>> @@ -69,6 +69,16 @@ enum SSL_Stats >>> ssl_total_tickets_not_found_stat, >>> ssl_total_tickets_renewed_stat, >>> >>> + /* error stats */ >>> + ssl_error_want_write, >>> + ssl_error_want_read, >>> + ssl_error_want_x509_lookup, >>> + ssl_error_syscall, >>> + ssl_error_read_eos, >>> + ssl_error_zero_return, >>> + ssl_error_ssl, >>> + ssl_sni_name_set_failure, >>> + >>> ssl_cipher_stats_start = 100, >>> ssl_cipher_stats_end = 300, >>> >>> >>> >> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/a8070bbb/iocore/net/SSLNetVConnection.cc >>> ---------------------------------------------------------------------- >>> diff --git a/iocore/net/SSLNetVConnection.cc >> b/iocore/net/SSLNetVConnection.cc >>> index 0f4a6b3..d3aa858 100644 >>> --- a/iocore/net/SSLNetVConnection.cc >>> +++ b/iocore/net/SSLNetVConnection.cc >>> @@ -140,22 +140,26 @@ ssl_read_from_net(SSLNetVConnection * sslvc, >> EThread * lthread, int64_t &ret) >>> >>> case SSL_ERROR_WANT_WRITE: >>> event = SSL_WRITE_WOULD_BLOCK; >>> - Debug("ssl", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(write)"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_write); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(write)"); >>> break; >>> case SSL_ERROR_WANT_READ: >>> event = SSL_READ_WOULD_BLOCK; >>> - Debug("ssl", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(read)"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_read); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(read)"); >>> break; >>> case SSL_ERROR_WANT_X509_LOOKUP: >>> event = SSL_READ_WOULD_BLOCK; >>> - Debug("ssl", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(read/x509 lookup)"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_x509_lookup); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_WOULD_BLOCK(read/x509 lookup)"); >>> break; >>> case SSL_ERROR_SYSCALL: >>> + SSL_INCREMENT_DYN_STAT(ssl_error_syscall); >>> if (rres != 0) { >>> // not EOF >>> event = SSL_READ_ERROR; >>> ret = errno; >>> - Debug("ssl", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_SYSCALL, underlying IO error: %s", strerror(errno)); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_SYSCALL, underlying IO error: %s", strerror(errno)); >>> } else { >>> // then EOF observed, treat it as EOS >>> event = SSL_READ_EOS; >>> @@ -164,13 +168,15 @@ ssl_read_from_net(SSLNetVConnection * sslvc, >> EThread * lthread, int64_t &ret) >>> break; >>> case SSL_ERROR_ZERO_RETURN: >>> event = SSL_READ_EOS; >>> - Debug("ssl", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_ZERO_RETURN"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_zero_return); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net] >> SSL_ERROR_ZERO_RETURN"); >>> break; >>> case SSL_ERROR_SSL: >>> default: >>> event = SSL_READ_ERROR; >>> ret = errno; >>> - SSLErrorVC(sslvc, "[SSL_NetVConnection::ssl_read_from_net]"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_ssl); >>> + Debug("ssl.error", "[SSL_NetVConnection::ssl_read_from_net]"); >>> break; >>> } // switch >>> break; >>> @@ -432,28 +438,37 @@ SSLNetVConnection::load_buffer_and_write(int64_t >> towrite, int64_t &wattempted, i >>> case SSL_ERROR_WANT_READ: >>> needs |= EVENTIO_READ; >>> r = -EAGAIN; >>> - Debug("ssl", "SSL_write-SSL_ERROR_WANT_READ"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_read); >>> + Debug("ssl.error", "SSL_write-SSL_ERROR_WANT_READ"); >>> break; >>> case SSL_ERROR_WANT_WRITE: >>> - case SSL_ERROR_WANT_X509_LOOKUP: >>> + case SSL_ERROR_WANT_X509_LOOKUP: { >>> + if (SSL_ERROR_WANT_WRITE == err) >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_write); >>> + else if (SSL_ERROR_WANT_X509_LOOKUP == err) >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_x509_lookup); >>> + >>> needs |= EVENTIO_WRITE; >>> r = -EAGAIN; >>> - Debug("ssl", "SSL_write-SSL_ERROR_WANT_WRITE"); >>> + Debug("ssl.error", "SSL_write-SSL_ERROR_WANT_WRITE"); >>> break; >>> + } >>> case SSL_ERROR_SYSCALL: >>> r = -errno; >>> - Debug("ssl", "SSL_write-SSL_ERROR_SYSCALL"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_syscall); >>> + Debug("ssl.error", "SSL_write-SSL_ERROR_SYSCALL"); >>> break; >>> // end of stream >>> case SSL_ERROR_ZERO_RETURN: >>> r = -errno; >>> - Debug("ssl", "SSL_write-SSL_ERROR_ZERO_RETURN"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_zero_return); >>> + Debug("ssl.error", "SSL_write-SSL_ERROR_ZERO_RETURN"); >>> break; >>> case SSL_ERROR_SSL: >>> default: >>> r = -errno; >>> - Debug("ssl", "SSL_write-SSL_ERROR_SSL"); >>> - SSLErrorVC(this, "SSL_write"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_ssl); >>> + Debug("ssl.error", "SSL_write-SSL_ERROR_SSL"); >>> break; >>> } >>> return (r); >>> @@ -653,7 +668,8 @@ SSLNetVConnection::sslClientHandShakeEvent(int &err) >>> if (SSL_set_tlsext_host_name(ssl, options.sni_servername)) { >>> Debug("ssl", "using SNI name '%s' for client handshake", >> options.sni_servername); >>> } else { >>> - SSLError("failed to set SNI name '%s' for client handshake", >> options.sni_servername); >>> + Debug("ssl.error","failed to set SNI name '%s' for client >> handshake", options.sni_servername); >>> + SSL_INCREMENT_DYN_STAT(ssl_sni_name_set_failure); >>> } >>> } >>> #endif >>> @@ -679,13 +695,18 @@ SSLNetVConnection::sslClientHandShakeEvent(int >> &err) >>> return EVENT_DONE; >>> >>> case SSL_ERROR_WANT_WRITE: >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> SSL_ERROR_WANT_WRITE"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_write); >>> return SSL_HANDSHAKE_WANT_WRITE; >>> >>> case SSL_ERROR_WANT_READ: >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_read); >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> SSL_ERROR_WANT_READ"); >>> return SSL_HANDSHAKE_WANT_READ; >>> >>> case SSL_ERROR_WANT_X509_LOOKUP: >>> - Debug("ssl", "SSLNetVConnection::sslClientHandShakeEvent, would >> block on read or write"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_want_x509_lookup); >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> SSL_ERROR_WANT_X509_LOOKUP"); >>> break; >>> >>> case SSL_ERROR_WANT_ACCEPT: >>> @@ -695,12 +716,14 @@ SSLNetVConnection::sslClientHandShakeEvent(int >> &err) >>> break; >>> >>> case SSL_ERROR_ZERO_RETURN: >>> - Debug("ssl", "SSLNetVConnection::sslClientHandShakeEvent, EOS"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_zero_return); >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> EOS"); >>> return EVENT_ERROR; >>> >>> case SSL_ERROR_SYSCALL: >>> err = errno; >>> - Debug("ssl", "SSLNetVConnection::sslClientHandShakeEvent, syscall"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_syscall); >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> syscall"); >>> return EVENT_ERROR; >>> break; >>> >>> @@ -708,7 +731,8 @@ SSLNetVConnection::sslClientHandShakeEvent(int &err) >>> case SSL_ERROR_SSL: >>> default: >>> err = errno; >>> - SSLErrorVC(this, "sslClientHandShakeEvent"); >>> + SSL_INCREMENT_DYN_STAT(ssl_error_ssl); >>> + Debug("ssl.error", "SSLNetVConnection::sslClientHandShakeEvent, >> SSL_ERROR_SSL"); >>> return EVENT_ERROR; >>> break; >>> >>> >>> >> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/a8070bbb/iocore/net/SSLUtils.cc >>> ---------------------------------------------------------------------- >>> diff --git a/iocore/net/SSLUtils.cc b/iocore/net/SSLUtils.cc >>> index 1576c6d..0d85a49 100644 >>> --- a/iocore/net/SSLUtils.cc >>> +++ b/iocore/net/SSLUtils.cc >>> @@ -664,6 +664,34 @@ SSLInitializeStatistics() >>> RECD_INT, RECP_PERSISTENT, (int) >> ssl_total_tickets_renewed_stat, >>> RecRawStatSyncCount); >>> >>> + >>> + /* error stats */ >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_want_write", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_error_want_write, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_want_read", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_error_want_read, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_want_x509_lookup", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_error_want_x509_lookup, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_syscall", >>> + RECD_INT, RECP_PERSISTENT, (int) ssl_error_syscall, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_read_eos", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_error_read_eos, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_zero_return", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_error_zero_return, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_error_ssl", >>> + RECD_INT, RECP_PERSISTENT, (int) ssl_error_ssl, >>> + RecRawStatSyncCount); >>> + RecRegisterRawStat(ssl_rsb, RECT_PROCESS, >> "proxy.process.ssl.ssl_sni_name_set_failure", >>> + RECD_INT, RECP_PERSISTENT, (int) >> ssl_sni_name_set_failure, >>> + RecRawStatSyncCount); >>> + >>> + >>> // Get and register the SSL cipher stats. Note that we are using the >> default SSL context to obtain >>> // the cipher list. This means that the set of ciphers is fixed by the >> build configuration and not >>> // filtered by proxy.config.ssl.server.cipher_suite. This keeps the >> set of cipher suites stable across >>> >>