The most likely problem here is that the NetVC (what you get from 
entry->waitingVConns.pop_back()) has been freed. That's just a pointer and if 
the actual NetVC has internally been put on the free list, dynamic_cast<> will 
crash. I think the NULL pointer is a red herring, an artifact of the bad NetVC 
pointer rather than the source.
 

    On Monday, July 18, 2016 9:34 AM, Steven R. Feltner <sfelt...@godaddy.com> 
wrote:
 

 I was finally able to turn on my SSL lazy loader plugin (temporarily) to 
production traffic.  After several minutes (random between 8 and 30 minutes), I 
get a seg fault at the dynamic_cast of the vc inside TSVConnSSLConnectionGet:


#0  0x00007ffff6e443b9 in __dynamic_cast () from /usr/lib64/libstdc++.so.6
#1  0x00000000005836fe in TSVConnSSLConnectionGet (sslp=0x6330004b1480) at 
InkAPI.cc:8746
#2  0x00007fffe32a8a9e in (anonymous namespace)::Loader_Thread 
(cont=0x62800001b6d0, event=TS_EVENT_TIMEOUT, arg=0x62b00016ec60) at 
ssl-lazy-loader.cc:704
#3  0x000000000055eedb in INKContInternal::handle_event (this=0x62800001b6d0, 
event=2, edata=0x62b00016ec60) at InkAPI.cc:1003
#4  0x00000000005311fd in Continuation::handleEvent (this=0x62800001b6d0, 
event=2, data=0x62b00016ec60) at ../iocore/eventsystem/I_Continuation.h:145
#5  0x0000000000a35595 in EThread::process_event (this=0x7fffca063800, 
e=0x62b00016ec60, calling_code=2) at UnixEThread.cc:128
#6  0x0000000000a35ebc in EThread::execute (this=0x7fffca063800) at 
UnixEThread.cc:207
#7  0x0000000000a33a93 in spawn_thread_internal (a=0x6040003c2a90) at 
Thread.cc:85
#8  0x00007ffff74a2aa1 in start_thread () from /lib64/libpthread.so.0
#9  0x00007ffff6643aad in clone () from /lib64/libc.so.6
(gdb) fr 1
#1  0x00000000005836fe in TSVConnSSLConnectionGet (sslp=0x6330004b1480) at 
InkAPI.cc:8746
8746    SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
(gdb) l
8741 TSVConnSSLConnectionGet(TSVConn sslp)
8742 {
8743  TSSslConnection ssl = NULL;
8744  NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
8745  if (vc != NULL) {
8746    SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
8747    if (ssl_vc != NULL) {
8748      ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
8749    }
8750  }
(gdb) p vc
$1 = (NetVConnection *) 0x6330004b1480
(gdb) p sslp
$2 = (TSVConn) 0x6330004b1480
(gdb) p ssl_vc
$3 = (SSLNetVConnection *) 0x60e00026aa70



I am running this linked with ASAN.  This is from a different session than the 
gdb listing above:

ASAN:SIGSEGV
=================================================================
==16299==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 
0x2b624393f3b9 sp 0x2b624ca9d7f0 bp 0x000000000000 T14)
[Jul 17 17:30:59.765] Server {0x2b624dec1700} DIAG: (ssl-cert-loader-backlog) 
PUSHing the vc for iwoneworleans.org off the waitingVConns while creating the 
new entry in CB_servername
    #0 0x2b624393f3b8 in __dynamic_cast (/usr/lib64/libstdc++.so.6+0xbb3b8)
    #1 0x58379d in TSVConnSSLConnectionGet 
/home/sfeltner/projects/trafficserver/proxy/InkAPI.cc:8746
    #2 0x2b6256deda9d in Loader_Thread 
/home/sfeltner/projects/trafficserver/plugins/experimental/ssl_cert_loader/ssl-cert-loader.cc:704
    #3 0x55ef7a in INKContInternal::handle_event(int, void*) 
/home/sfeltner/projects/trafficserver/proxy/InkAPI.cc:1003
    #4 0x53129c in Continuation::handleEvent(int, void*) 
/home/sfeltner/projects/trafficserver/iocore/eventsystem/I_Continuation.h:145
    #5 0xa3579e in EThread::process_event(Event*, int) 
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEThread.cc:128
    #6 0xa360c5 in EThread::execute() 
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEThread.cc:207
    #7 0xa33c9c in spawn_thread_internal 
/home/sfeltner/projects/trafficserver/iocore/eventsystem/Thread.cc:85
    #8 0x2b6243261aa0 in start_thread (/lib64/libpthread.so.0+0x7aa0)
    #9 0x2b624410daac in __clone (/lib64/libc.so.6+0xe8aac)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV ??:0 __dynamic_cast
Thread T14 ([ET_NET 13]) created by T0 ([ET_NET 0]) here:
    #0 0x3e0cc23c7a in pthread_create (/usr/lib64/libasan.so.1+0x3e0cc23c7a)
    #1 0xa33778 in ink_thread_create ../../lib/ts/ink_thread.h:150
    #2 0xa33e25 in Thread::start(char const*, unsigned long, void* (*)(void*), 
void*) /home/sfeltner/projects/trafficserver/iocore/eventsystem/Thread.cc:100
    #3 0xa3959d in EventProcessor::start(int, unsigned long) 
/home/sfeltner/projects/trafficserver/iocore/eventsystem/UnixEventProcessor.cc:140
    #4 0x597735 in main /home/sfeltner/projects/trafficserver/proxy/Main.cc:1647
    #5 0x2b6244043d1c in __libc_start_main (/lib64/libc.so.6+0x1ed1c)

==16299==ABORTING


Here is the original code that was seg faulting:

TSSslConnection
TSVConnSSLConnectionGet(TSVConn sslp)
{
  TSSslConnection ssl = NULL;
  NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
  SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
  if (ssl_vc != NULL) {
    ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
  }
  return ssl;
}

I have added some checks for NULL and this is the resulting updated code:

TSSslConnection
TSVConnSSLConnectionGet(TSVConn sslp)
{
  TSSslConnection ssl = NULL;
  NetVConnection *vc = reinterpret_cast<NetVConnection *>(sslp);
  if (vc != NULL) {
    SSLNetVConnection *ssl_vc = dynamic_cast<SSLNetVConnection *>(vc);
    if (ssl_vc != NULL) {
      ssl = reinterpret_cast<TSSslConnection>(ssl_vc->ssl);
    }
  }
  return ssl;
}

and here is the code that calls that function:


  TSMutexLock(entry->mutex);
  while (entry->waitingVConns.begin() != entry->waitingVConns.end()) {
    TSVConn vc = entry->waitingVConns.back();
    entry->waitingVConns.pop_back();
    if (vc != NULL){
      TSSslConnection sslobj = TSVConnSSLConnectionGet(vc);
      SSL *ssl = reinterpret_cast<SSL *>(sslobj);
      SSL_set_SSL_CTX(ssl, entry->ctx);
      TSDebug("redis-loader-thread", "Resolving the SSL ctx for %s in the "
              "Loader_Thread", entry->redis_CN.c_str());
      TSVConnReenable(vc);
    }


How can I be getting a NULL reference (according to ASAN) with all of these 
checks in place?  Why is the dynamic_cast referencing a NULL?  What am I 
missing?


I would appreciate any and all feedback or advice...


Thanks,

Steven

  

Reply via email to