On Thu, 17 Nov 2005, Daniel Holtkamp wrote:

Hi !

Luke Dean wrote:

Last month I reported a problem where my system would hang whenever I ran multiple concurrent jobs that backup to a single RAID array after upgrading from bacula 1.36.2 to 1.36.3. This problem still persists in 1.38.1.

I`m still using 1.37.40 but since you seem to have the problem in 1.36.2 and 1.38.1 i don`t think that matters ...

I did not have the problem in 1.36.2. The problem was introduced in 1.36.3. Someone else on this list confirmed that earlier. I never tried anything in the 1.37 series though.

I have about 10 clients that all do a backup at 1:05. Each client has his own job, his own pool and his own storage device. I don`t know if by four storage daemons you mean 4x bacula-sd but i`m only running each service once.

I'm also running one bacula-sd. I've got four "Storage" sections in bacula-dir.conf, each with a unique "Name" and "Device". I tried unique "Media Type"s too, but that didn't help.


Max concurrent jobs is set to 10 of course.

All these backups go to a 350GB Raid-5 array and the directory structure is like this:
./storage/clientA/clientA.0001
./storage/clientB/clientB.0001
etc

I`m having absolutely NO problems whatsoever with all jobs concurrently writing to the raid device.

I was backing up to
/mirror/bacula
That locks up within about 10 seconds of four jobs running concurrently.
I reconfigured it to back up to
/mirror/bacula/client1
/mirror/bacula/client2
etc...
It takes several minutes to lock up the machine that way, but it still freezes.

Maybe the problem is somewhere else ?

Maybe. Are you doing software raid or hardware raid? I was using hardware raid in 1.36.2 when I didn't have problems. Now I'm using FreeBSD's gmirror as software raid. I really doubt it matters, but perhaps the internal locking is handled differently enough to be a problem.

I'll post the configuration that produces the lockup, in case anyone wants to check it for mistakes. Here's bacula-dir.conf and bacula-sd.conf. Everything is bacula version 1.38.1, even the file daemons on the windows machines. "border" is running FreeBSD 6, and got the same problems with FreeBSD 5.4. I'm using the 4BSD scheduler.
/mirror is a gmirror.
This produces a hard freeze on "border" just a few seconds after multiple jobs start accessing the storage daemon at the same time. Making the changes in the storage daemon to have each "Device" point to a different subdirectory delays the freeze by several minutes. Changing the configuration to only have one "Storage" defined or cutting back "Maximum Concurrent Jobs" on the director to 1 avoids the freeze.

bacula-dir.conf
--------------------------------------------------------------
Director {                            # define myself
  Name = border-dir
  DIRport = 9101                # where we listen for UA connections
  QueryFile = "/usr/local/share/bacula/query.sql"
  WorkingDirectory = "/var/db/bacula"
  PidDirectory = "/var/run"
  Maximum Concurrent Jobs = 4
  FD Connect Timeout = 60 min  #retry for an hour
  Password = "xxx"         # Console password
  Messages = Standard
}

JobDefs {
  Name = "DefaultJob"
  Type = Backup
  Level = Incremental
  FileSet = "Windows"
  Storage = File
  Messages = Standard
  Pool = Default
  Priority = 10
  Max Start Delay = 82800  #23 hours
}

Job {
  Name = "Abigail"
  JobDefs = "DefaultJob"
  Client = "Abigail-fd"
  Write Bootstrap = "/var/db/bacula/Abigail.bsr"
  Storage = AbigailFile
  Pool = Abigail
  Schedule = "WeeklyCycle1"
}
Job {
  Name = "Tani"
  JobDefs = "DefaultJob"
  Client = "Tani-fd"
  Write Bootstrap = "/var/db/bacula/Tani.bsr"
  Storage = TaniFile
  Pool = Tani
  Schedule = "WeeklyCycle2"
}
Job {
  Name = "greentower"
  JobDefs = "DefaultJob"
  Client = "greentower-fd"
  Write Bootstrap = "/var/db/bacula/greentower.bsr"
  Storage = greentowerFile
  Pool = greentower
  Schedule = "WeeklyCycle3"
  FileSet = "greentowerFileSet"
}
Job {
  Name = "border"
  JobDefs = "DefaultJob"
  Client = "border-fd"
  Write Bootstrap = "/var/db/bacula/border.bsr"
  Storage = borderFile
  Pool = border
  Schedule = "WeeklyCycle4"
  FileSet = "borderFileSet"
}


# Backup the catalog database (after the nightly save)
Job {
  Name = "BackupCatalog"
  JobDefs = "DefaultJob"
  Client = "border-fd"
  Level = Full
  FileSet="Catalog"
  Schedule = "WeeklyCycleAfterBackup"
  # This creates an ASCII copy of the catalog
  RunBeforeJob = "/usr/local/share/bacula/make_catalog_backup bacula bacula"
  ## This deletes the copy of the catalog
  #RunAfterJob  = "/usr/local/share/bacula/delete_catalog_backup"
  Write Bootstrap = "/var/db/bacula/BackupCatalog.bsr"
  Priority = 11                   # run after main backup
}

Job {
  Name = "RestoreAbigail"
  Type = Restore
  Client=Abigail-fd
  FileSet="Windows"
  Storage = AbigailFile
  Pool = Abigail
  Messages = Standard
  Where = /tmp/bacula-restores
}
Job {
  Name = "RestoreTani"
  Type = Restore
  Client=Tani-fd
  FileSet="Windows"
  Storage = TaniFile
  Pool = Tani
  Messages = Standard
  Where = /tmp/bacula-restores
}
Job {
  Name = "Restoregreentower"
  Type = Restore
  Client=greentower-fd
  FileSet="greentowerFileSet"
  Storage = greentowerFile
  Pool = greentower
  Messages = Standard
  Where = /tmp/bacula-restores
}
Job {
  Name = "Restoreborder"
  Type = Restore
  Client=border-fd
  FileSet="borderFileSet"
  Storage = borderFile
  Pool = border
  Messages = Standard
  Where = /tmp/bacula-restores
}


FileSet {
   Name = "Windows"
   Include {
      Options {
         signature=MD5
         compression=gzip
      }
      File = C:/
   }
}
FileSet {
   Name = "borderFileSet"
   Include {
      Options {
         signature=MD5
         compression=gzip
      }
      File = /usr/home
      File = /root
      File = /etc
      File = /usr/local/etc
      File = /var/mail
      File = /var/log
      File = /mirror
   }
   Exclude {
      File = /mirror/bacula
   }
}
FileSet {
   Name = "greentowerFileSet"
   Include {
      Options {
         signature=MD5
         compression=gzip
      }
      File = /home
      File = /root
      File = /etc
      File = /usr/local/etc
      File = /var/log
   }
}
# This is the backup of the catalog
FileSet {
  Name = "Catalog"
  Include {
    Options {
      signature = MD5
      compression=gzip
    }
    File = /var/db/bacula/bacula.sql
  }
}

Schedule {
  Name = "WeeklyCycle1"
  Run = Full 1st sun at 2:05
  Run = Differential 2nd-5th sun at 2:05
  Run = Incremental mon-sat at 2:05
}
Schedule {
  Name = "WeeklyCycle2"
  Run = Full 2nd sun at 2:05
  Run = Differential 1st,3rd-5th sun at 2:05
  Run = Incremental mon-sat at 2:05
}
Schedule {
  Name = "WeeklyCycle3"
  Run = Full 3rd sun at 2:05
  Run = Differential 1st-2nd,4th-5th sun at 2:05
  Run = Incremental mon-sat at 2:05
}
Schedule {
  Name = "WeeklyCycle4"
  Run = Full 4th sun at 2:05
  Run = Differential 1st-3rd,5th sun at 2:05
  Run = Incremental mon-sat at 2:05
}
# This schedule does the catalog. It starts after the WeeklyCycle
Schedule {
  Name = "WeeklyCycleAfterBackup"
  Run = Full sun-sat at 2:10
}

# Client (File Services) to backup
Client {
  Name = Abigail-fd
  Address = Abigail
  FDPort = 9102
  Catalog = MyCatalog
  Password = "xxx"  # password for FileDaemon
  File Retention = 30 days            # 30 days
  Job Retention = 6 months            # six months
  AutoPrune = yes                     # Prune expired Jobs/Files
}
Client {
  Name = Tani-fd
  Address = Tani
  FDPort = 9102
  Catalog = MyCatalog
  Password = "xxx"          #password for FileDaemon
  File Retention = 30 days            # 30 days
  Job Retention = 6 months            # six months
  AutoPrune = yes                     # Prune expired Jobs/Files
}
Client {
  Name = greentower-fd
  Address = greentower
  FDPort = 9102
  Catalog = MyCatalog
  Password = "xxx"          # password for FileDaemon
  File Retention = 30 days            # 30 days
  Job Retention = 6 months            # six months
  AutoPrune = yes                     # Prune expired Jobs/Files
}
Client {
  Name = border-fd
  Address = border
  FDPort = 9102
  Catalog = MyCatalog
  Password = "xxx"          # password for FileDaemon
  File Retention = 30 days            # 30 days
  Job Retention = 6 months            # six months
  AutoPrune = yes                     # Prune expired Jobs/Files
}



# Definiton of file storage device
Storage {
  Name = File
  Address = border            # N.B. Use a fully qualified name here
  SDPort = 9103
  Password = "xxx"
  Device = FileStorage
  Media Type = File
}
Storage {
  Name = AbigailFile
  Address = border            # N.B. Use a fully qualified name here
  SDPort = 9103
  Password = "xxx"
  Device = AbigailFileStorage
  Media Type = File
}
Storage {
  Name = TaniFile
  Address = border            # N.B. Use a fully qualified name here
  SDPort = 9103
  Password = "xxx"
  Device = TaniFileStorage
  Media Type = File
}
Storage {
  Name = greentowerFile
  Address = border            # N.B. Use a fully qualified name here
  SDPort = 9103
  Password = "xxx"
  Device = greentowerFileStorage
  Media Type = File
}
Storage {
  Name = borderFile
  Address = border            # N.B. Use a fully qualified name here
  SDPort = 9103
  Password = "xxx"
  Device = borderFileStorage
  Media Type = File
}

# Generic catalog service
Catalog {
  Name = MyCatalog
  dbname = bacula; user = bacula; password = ""
}

#Messages section removed for this email

Pool {
  Name = Default
  LabelFormat = "Catalog-"
  Pool Type = Backup
  Recycle = yes                       # Bacula can automatically recycle Volumes
  AutoPrune = yes                     # Prune expired volumes
  Volume Retention = 365 days         # one year
  Accept Any Volume = yes             # write on any volume in the pool
}
Pool {
   Name = Abigail
   LabelFormat = "Abigail-"
   Pool Type = Backup
   Recycle = yes
   AutoPrune = yes
   Volume Retention = 32 days
   Maximum Volume Jobs = 1
   Maximum Volumes = 33
}
Pool {
   Name = Tani
   LabelFormat = "Tani-"
   Pool Type = Backup
   Recycle = yes
   AutoPrune = yes
   Volume Retention = 32 days
   Maximum Volume Jobs = 1
   Maximum Volumes = 33
}
Pool {
   Name = greentower
   LabelFormat = "greentower-"
   Pool Type = Backup
   Recycle = yes
   AutoPrune = yes
   Volume Retention = 32 days
   Maximum Volume Jobs = 1
   Maximum Volumes = 33
}
Pool {
   Name = border
   LabelFormat = "border-"
   Pool Type = Backup
   Recycle = yes
   AutoPrune = yes
   Volume Retention = 32 days
   Maximum Volume Jobs = 1
   Maximum Volumes = 33
}

bacula-sd.conf
-------------------------------------------------------------
Storage {                             # definition of myself
  Name = border-sd
  SDPort = 9103                  # Director's port
  WorkingDirectory = "/var/db/bacula"
  Pid Directory = "/var/run"
  Maximum Concurrent Jobs = 20
}

Director {
  Name = border-dir
  Password = "xxx"
}

Device {
  Name = FileStorage
  Media Type = File
  Archive Device = /mirror/bacula
  # also tried /mirror/bacula/default
  LabelMedia = yes;                   # lets Bacula label unlabeled media
  Random Access = Yes;
  AutomaticMount = yes;               # when device opened, read it
  RemovableMedia = no;
  AlwaysOpen = no;
}
Device {
   Name = AbigailFileStorage
   Archive Device = /mirror/bacula
   #also tried /mirror/bacula/Abigail
   Media Type = File
   Always Open = no
   Removable Media = no
   Random Access = yes
   Label Media = yes
   Automatic Mount = yes
}
Device {
   Name = TaniFileStorage
   Archive Device = /mirror/bacula
   #also tried /mirror/bacula/Tani
   Media Type = File
   Always Open = no
   Removable Media = no
   Random Access = yes
   Label Media = yes
   Automatic Mount = yes
}
Device {
   Name = greentowerFileStorage
   Archive Device = /mirror/bacula
   #also tried /mirror/bacula/greentower
   Media Type = File
   Always Open = no
   Removable Media = no
   Random Access = yes
   Label Media = yes
   Automatic Mount = yes
}
Device {
   Name = borderFileStorage
   Archive Device = /mirror/bacula
   #also tried /mirror/bacula/border
   Media Type = File
   Always Open = no
   Removable Media = no
   Random Access = yes
   Label Media = yes
   Automatic Mount = yes
}

Messages {
  Name = Standard
  director = border-dir = all
}


-------------------------------------------------------
This SF.Net email is sponsored by the JBoss Inc.  Get Certified Today
Register for a JBoss Training Course.  Free Certification Exam
for All Training Attendees Through End of 2005. For more info visit:
http://ads.osdn.com/?ad_id=7628&alloc_id=16845&op=click
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users

Reply via email to