I've run into some trouble with bacula - bacula-fd on one of my clients is
reliably crashing during full backups.  The configuration uses both TLS and
compression, and a basically identical configuration (except for certs and
hostnames) works just fine for a number of other machines.

I've attached a traceback + some config files with hostnames / passwords /
other client stuff changed/removed to protect the innocent.

ideas?

Nick
warning: Unable to get location for thread creation breakpoint: generic error
[New Thread 0x80b1800 (runnable)]
[New Thread 0x80b1400 (sleeping)]
[New Thread 0x80b1200 (LWP 100153)]
[New Thread 0x80a0000 (runnable)]
[New LWP 100101]
[Switching to LWP 100101]
0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
$1 = "client1-fd", '\0' <repeats 22 times>
$2 = 0x80a3018 "bacula-fd"
$3 = 0x80a3058 "/usr/local/sbin/bacula-fd"
$4 = 0x0
$5 = 0x808781c "2.2.3 (09 September 2007)"
$6 = 0x808192e "i386-portbld-freebsd6.2"
$7 = 0x8081926 "freebsd"
$8 = 0x8081917 "6.2-RELEASE-p5"
#0  0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
#1  0x280e480f in pthread_mutexattr_init () from /lib/libpthread.so.2
#2  0x08098400 in ?? ()

Thread 5 (LWP 100101):
#0  0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
#1  0x280e480f in pthread_mutexattr_init () from /lib/libpthread.so.2
#2  0x08098400 in ?? ()

Thread 4 (Thread 0x80a0000 (runnable)):
#0  0x284b12cf in select () from /lib/libc.so.6
#1  0x280d4091 in select () from /lib/libpthread.so.2
#2  0x08063b6a in bnet_thread_server (addrs=0xbfbfeac0, max_clients=20, 
    client_wq=0x80965c0, 
    handle_client_request=0x8051cf0 <handle_client_request(void*)>)
    at bnet_server.c:161
#3  0x0804d0f4 in main (argc=0, argv=0x0) at filed.c:227

Thread 3 (Thread 0x80b1200 (LWP 100153)):
#0  0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
#1  0x280e3ec8 in pthread_mutexattr_init () from /lib/libpthread.so.2
#2  0x280bc450 in ?? ()

Thread 2 (Thread 0x80b1400 (sleeping)):
#0  0x280e3f9b in pthread_mutexattr_init () from /lib/libpthread.so.2
#1  0x280e412f in pthread_mutexattr_init () from /lib/libpthread.so.2
#2  0x280e906d in _pthread_cond_timedwait () from /lib/libpthread.so.2
#3  0x280e95c2 in pthread_cond_timedwait () from /lib/libpthread.so.2
#4  0x0807b423 in watchdog_thread (arg=0x0) at watchdog.c:307
#5  0x280dc3a5 in pthread_create () from /lib/libpthread.so.2
#6  0x28499137 in _ctx_start () from /lib/libc.so.6

Thread 1 (Thread 0x80b1800 (runnable)):
#0  0x284a2167 in unlink () from /lib/libc.so.6
#1  0x08077cad in signal_handler (sig=11) at signal.c:167
#2  0x280d8a42 in sigaction () from /lib/libpthread.so.2
#3  0x280d9ee5 in sigaction () from /lib/libpthread.so.2
#4  0x280e2d31 in pthread_mutexattr_init () from /lib/libpthread.so.2
#5  0x280e2d9f in pthread_mutexattr_init () from /lib/libpthread.so.2
#6  0x28499137 in _ctx_start () from /lib/libc.so.6
#7  0x00000000 in ?? ()
#8  0xbf6f7290 in ?? ()
#9  0xbf6f6fd0 in ?? ()
#10 0x00000000 in ?? ()
#11 0x280e2d5c in pthread_mutexattr_init () from /lib/libpthread.so.2
#12 0x080612bf in _p (m=0x8096abc) at berrno.h:90
#13 0x08072c41 in dequeue_messages (jcr=0x80bd418) at message.c:1350
#14 0x0806e66d in b_free_jcr (file=0x808731c "jcr.c", line=716, jcr=0x80bd418)
    at jcr.c:426
#15 0x0806ed8c in jcr_walk_end (jcr=0x2) at jcr.c:716
#16 0x0806e8db in get_jcr_from_tid (tid=0x80b1800) at jcr.c:496
#17 0x0806e887 in get_jcr_from_tid () at jcr.c:482
#18 0x08073acc in openssl_post_errors (code=4, 
    errstring=0x80895d4 "TLS read/write failure.") at openssl.c:69
#19 0x08079a0c in tls_bsock_writen (bsock=0x80b9118, ptr=0x80bd824 "", 
    nbytes=240) at tls.c:639
#20 0x080628ae in write_nbytes (bsock=0x80b9118, ptr=0x80bd824 "", nbytes=240)
    at bnet.c:130
#21 0x08065541 in BSOCK::send (this=0x80b9118) at bsock.c:292
#22 0x0806321b in bnet_fsend (bs=0x80b9118, 
    fmt=0x8087f08 "Jmsg Job=%s type=%d level=%d %s") at bnet.c:571
#23 0x08071dd7 in dispatch_message (jcr=0x80bd418, type=4, mtime=1190417924, 
    msg=0xbf6f8aa0 "client1-fd: backup-client1.xxx.com.2007-09-21_22.28.40 
Error: bsock.c:311 Wrote 47554 bytes to Storage 
daemon:server1.netability.ie:9103, but only 16384 accepted.\n") at message.c:776
#24 0x08072706 in Jmsg (jcr=0x80bd418, type=4, mtime=1190417924, 
    fmt=0x808a735 "%s") at message.c:1146
#25 0x08072c70 in dequeue_messages (jcr=0x80bd418) at message.c:1356
#26 0x0806e66d in b_free_jcr (file=0x808731c "jcr.c", line=716, jcr=0x80bd418)
    at jcr.c:426
#27 0x0806ed8c in jcr_walk_end (jcr=0x2) at jcr.c:716
#28 0x0806e8db in get_jcr_from_tid (tid=0x80b1800) at jcr.c:496
#29 0x0806e887 in get_jcr_from_tid () at jcr.c:482
#30 0x08073acc in openssl_post_errors (code=4, 
    errstring=0x80895d4 "TLS read/write failure.") at openssl.c:69
#31 0x08079a0c in tls_bsock_writen (bsock=0x80b9118, ptr=0x80bd824 "", 
    nbytes=200) at tls.c:639
#32 0x080628ae in write_nbytes (bsock=0x80b9118, ptr=0x80bd824 "", nbytes=200)
    at bnet.c:130
#33 0x08065541 in BSOCK::send (this=0x80b9118) at bsock.c:292
#34 0x0806321b in bnet_fsend (bs=0x80b9118, 
    fmt=0x8087f08 "Jmsg Job=%s type=%d level=%d %s") at bnet.c:571
#35 0x08071dd7 in dispatch_message (jcr=0x80bd418, type=3, mtime=1190417924, 
    msg=0xbf6fa230 "client1-fd: backup-client1.xxx.com.2007-09-21_22.28.40 
Fatal error: backup.c:892 Network send error to SD. ERR=Broken pipe\n")
    at message.c:776
#36 0x08072706 in Jmsg (jcr=0x80bd418, type=3, mtime=0, fmt=0x808a735 "%s")
    at message.c:1146
#37 0x08072811 in j_msg (file=0x8081001 "backup.c", line=892, jcr=0x80bd418, 
    type=3, mtime=0, fmt=0x8080e38 "Network send error to SD. ERR=%s\n")
    at message.c:1184
#38 0x080504cc in send_data (jcr=0x80bd418, stream=4, ff_pkt=0x80b1e18, 
    digest=0x80ca718, signing_digest=0x0) at backup.c:891
#39 0x0804f81b in save_file (ff_pkt=0x80b1e18, vjcr=0x80bd418, top_level=false)
    at backup.c:476
#40 0x0805b1b3 in our_callback (ff=0x80b1e18, hpkt=0x80bd418, top_level=false)
    at find.c:395
#41 0x0805bd7f in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x8159618 "/data/mailman/archives/private/members.mbox/members.mbox", 
parent_device=2, top_level=false) at find_one.c:430
#42 0x0805c4c7 in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x8159018 "/data/mailman/archives/private/members.mbox", 
    parent_device=2, top_level=false) at find_one.c:638
#43 0x0805c4c7 in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x80f4018 "/data/mailman/archives/private", parent_device=2, 
    top_level=false) at find_one.c:638
#44 0x0805c4c7 in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x80f2d18 "/data/mailman/archives", parent_device=2, top_level=false)
    at find_one.c:638
#45 0x0805c4c7 in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x80f2618 "/data/mailman", parent_device=2, top_level=false)
    at find_one.c:638
#46 0x0805c4c7 in find_one_file (jcr=0x80bd418, ff_pkt=0x80b1e18, 
    handle_file=0x805b15c <our_callback>, pkt=0x80bd418, 
    fname=0x80b6360 "/data", parent_device=2, top_level=true) at find_one.c:638
#47 0x0805a8b6 in find_files (jcr=0x80bd418, ff=0x80b1e18, callback=0x2, 
    his_pkt=0x80bd418) at find.c:200
#48 0x0804ea80 in blast_data_to_storage_daemon (jcr=0x80bd418, addr=0x0)
    at backup.c:158
#49 0x08054d0b in backup_cmd (jcr=0x80bd418) at job.c:1437
#50 0x08051fc1 in handle_client_request (dirp=0x80b9118) at job.c:250
#51 0x0807bd89 in workq_server (arg=0x80965c0) at workq.c:357
#52 0x280dc3a5 in pthread_create () from /lib/libpthread.so.2
#53 0x28499137 in _ctx_start () from /lib/libc.so.6
#0  0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
#0  0x280eb4f7 in pthread_testcancel () from /lib/libpthread.so.2
No symbol table info available.
#1  0x280e480f in pthread_mutexattr_init () from /lib/libpthread.so.2
No symbol table info available.
#2  0x08098400 in ?? ()
No symbol table info available.
#0  0x00000000 in ?? ()
No symbol table info available.
#0  0x00000000 in ?? ()
No symbol table info available.
#0  0x00000000 in ?? ()
No symbol table info available.
#0  0x00000000 in ?? ()
No symbol table info available.
#0  0x00000000 in ?? ()
No symbol table info available.
#
# List Directors who are permitted to contact this File daemon
#
Director {
  Name = server1-dir
  Password = "<gone>"

  TLS Enable = yes
  TLS Require = yes
  TLS Verify Peer = yes

  # Allow only the Director to connect
  TLS Allowed CN = "server1.netability.ie"

  TLS CA Certificate File = /opt/local/etc/bacula/cacert.pem
  TLS Certificate = /opt/local/etc/bacula/bacula-client1.xxx.com-cert.pem
  TLS Key = /opt/local/etc/bacula/bacula-client1.xxx.com-key.pem
}

#
# Restricted Director, used by tray-monitor to get the
#   status of the file daemon
#
Director {
  Name = client1-mon
  Password = "Wey+cW+dZ8g/xiHUdjEYSgV7ZthCMtXe1qqNTS69KUe9"
  Monitor = yes
}

#
# "Global" File daemon configuration specifications
#
FileDaemon {                          # this is me
  Name = client1-fd
  FDport = 9102                  # where we listen for the director
  WorkingDirectory = /var/db/bacula
  Pid Directory = /var/run
  Maximum Concurrent Jobs = 2

  TLS Enable = yes
  TLS Require = yes
 
  TLS CA Certificate File = /opt/local/etc/bacula/cacert.pem
  TLS Certificate = /opt/local/etc/bacula/bacula-client1.xxx.com-cert.pem
  TLS Key = /opt/local/etc/bacula/bacula-client1.xxx.com-key.pem
}

# Send all messages except skipped files back to Director
Messages {
  Name = Standard
  director = server1-dir = all, !skipped, !restored
}
Director {                            # define myself
  Name = server1-dir
  DIRport = 9101                # where we listen for UA connections
  QueryFile = "/usr/local/share/bacula/query.sql"
  WorkingDirectory = "/var/db/bacula"
  PidDirectory = "/var/run"
  Maximum Concurrent Jobs = 1
  Password = "<gone>"
  Messages = Daemon

  TLS Enable = yes 
  TLS Require = yes
  TLS Verify Peer = yes

  # Allow only the Director to connect
  TLS Allowed CN = "server1.netability.ie"

  TLS Allowed CN = "client1.xxx.com"

  TLS CA Certificate File = /opt/local/etc/bacula/cacert.pem
  TLS Certificate = /opt/local/etc/bacula/bacula-server1.netability.ie-cert.pem
  TLS Key = /opt/local/etc/bacula/bacula-server1.netability.ie-key.pem
}

Job {
  Name = "backup-client1.xxx.com"
  Type = Backup
  Client = client1.xxx.com-fd
  FileSet = "FileSet_client1.xxx.com"
  Schedule = "WeeklyCycle"
  Storage = File_XXX_COM
  Messages = Standard
  Pool = Default
  Full Backup Pool = Pool-Full-XXX_COM
  Incremental Backup Pool = Pool-Increm-XXX_COM
  Differential Backup Pool = Pool-Diff-XXX_COM
  Write Bootstrap = "/var/db/bacula/client1.xxx.com.bsr"
  Priority = 10
}

FileSet {
  Name = "FileSet_client1.xxx.com"
  Include {
    Options {
      WildFile = "*.core"
      WildFile = "*.bak"
      WildFile = "*~"
      WildFile = ".*~"
      WildFile = ".#*"
      WildFile = "/var/db/mysql/*.log*"
      exclude = yes      
      signature = SHA1
      compression = gzip
    }

    File = /data
  }
  Exclude {
    File = /proc
    File = /tmp
    File = /dev
    File = /.journal
    File = /.fsck
    File = /.snap
    File = /var/crash
    File = /var/tmp
    File = /var/db/clamav
    File = /var/db/portsnap
    File = /data/usr.ports
    File = /data/qugh-restore
    File = /data/src
  }
}


# This is the backup of the catalog
FileSet {
  Name = "Catalog"
  Include {
    Options { signature = SHA1 }
    File = /var/db/bacula/bacula.sql
  }
}


#
# When to do the backups, full backup on first sunday of the month,
#  differential (i.e. incremental since full) every other sunday,
#  and incremental backups other days
Schedule {
  Name = "WeeklyCycle"
  Run = Full 1st sun at 02:05
  Run = Differential 2nd-5th sun at 02:05
  Run = Incremental mon-sat at 02:05
}

# This schedule does the catalog. It starts after the WeeklyCycle
Schedule {
  Name = "WeeklyCycleAfterBackup"
  Run = Full sun-sat at 03:10
}

Client {
  Name = client1.xxx.com-fd
  Address = client1.xxx.com
  FDPort = 9102
  Catalog = GenericCatalog
  Password = "<gone>"
  File Retention = 20 days
  Job Retention = 6 months
  AutoPrune = yes                     # Prune expired Jobs/Files

  TLS Enable = yes
  TLS Require = yes

  TLS CA Certificate File = /opt/local/etc/bacula/cacert.pem
  TLS Certificate = /opt/local/etc/bacula/bacula-server1.netability.ie-cert.pem
  TLS Key = /opt/local/etc/bacula/bacula-server1.netability.ie-key.pem
}

Storage {
  Name = File_XXX_COM
  Address = server1.netability.ie                # N.B. Use a fully qualified 
name here
  SDPort = 9103
  Password = "<gone>"
  Device = FileStorage_XXX_COM
  Media Type = File
}

# Generic catalog service
Catalog {
  Name = GenericCatalog
  dbname = bacula; user = bacula; password = "<gone>"
}

# Reasonable message delivery -- send most everything to email address
#  and to the console
Messages {
  Name = Standard
  mailcommand = "/usr/local/sbin/bsmtp -h localhost -f \"\(Bacula\) %r\" -s 
\"Bacula: %t %e of %c %l\" %r"
  operatorcommand = "/usr/local/sbin/bsmtp -h localhost -f \"\(Bacula\) %r\" -s 
\"Bacula: Intervention needed for %j\" %r"
  mail = [EMAIL PROTECTED] = all, !skipped            
  operator = [EMAIL PROTECTED] = mount
  console = all, !skipped, !saved
  append = "/var/db/bacula/log" = all, !skipped

}

#
# Message delivery for daemon messages (no job).
Messages {
  Name = Daemon
  mailcommand = "/usr/local/sbin/bsmtp -h localhost -f \"\(Bacula\) %r\" -s 
\"Bacula daemon message\" %r"
  mail = [EMAIL PROTECTED] = all, !skipped            
  console = all, !skipped, !saved
  append = "/var/db/bacula/log" = all, !skipped
}

#
# Restricted console used by tray-monitor to get the status of the director
#
Console {
  Name = server1-mon
  Password = "<gone>"
  CommandACL = status, .status
}

# Default pool definition
Pool {
  Name = Default
  Pool Type = Backup
  Recycle = yes                       # Bacula can automatically recycle Volumes
  AutoPrune = yes                     # Prune expired volumes
  Volume Retention = 365 days         # one year
}

Pool {
  Name = Pool-Full-XXX_COM
  Pool Type = Backup
  Recycle = yes
  AutoPrune = yes
  Volume Retention = 6 months
  Maximum Volume Jobs = 1
  Label Format = Pool-Full-XXX_COM-
  Maximum Volumes = 6
}

Pool {
  Name = Pool-Diff-XXX_COM
  Pool Type = Backup
  Recycle = yes
  AutoPrune = yes
  Volume Retention = 2 months
  Maximum Volume Jobs = 1
  Label Format = Pool-Diff-XXX_COM-
  Maximum Volumes = 10
}

Pool {
  Name = Pool-Increm-XXX_COM
  Pool Type = Backup
  Recycle = yes
  AutoPrune = yes
  Volume Retention = 20 days
  Maximum Volume Jobs = 1
  Label Format = Pool-Increm-XXX_COM-
  Maximum Volumes = 20
}

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users

Reply via email to