On Thu, 17 Nov 2005, Daniel Holtkamp wrote:
Hi !
Luke Dean wrote:
Last month I reported a problem where my system would hang whenever I ran
multiple concurrent jobs that backup to a single RAID array after upgrading
from bacula 1.36.2 to 1.36.3. This problem still persists in 1.38.1.
I`m still using 1.37.40 but since you seem to have the problem in 1.36.2 and
1.38.1 i don`t think that matters ...
I did not have the problem in 1.36.2. The problem was introduced in
1.36.3. Someone else on this list confirmed that earlier. I never tried
anything in the 1.37 series though.
I have about 10 clients that all do a backup at 1:05. Each client has his own
job, his own pool and his own storage device. I don`t know if by four storage
daemons you mean 4x bacula-sd but i`m only running each service once.
I'm also running one bacula-sd. I've got four "Storage" sections in
bacula-dir.conf, each with a unique "Name" and "Device". I tried unique
"Media Type"s too, but that didn't help.
Max concurrent jobs is set to 10 of course.
All these backups go to a 350GB Raid-5 array and the directory structure is
like this:
./storage/clientA/clientA.0001
./storage/clientB/clientB.0001
etc
I`m having absolutely NO problems whatsoever with all jobs concurrently
writing to the raid device.
I was backing up to
/mirror/bacula
That locks up within about 10 seconds of four jobs running concurrently.
I reconfigured it to back up to
/mirror/bacula/client1
/mirror/bacula/client2
etc...
It takes several minutes to lock up the machine that way, but it still
freezes.
Maybe the problem is somewhere else ?
Maybe. Are you doing software raid or hardware raid? I was using
hardware raid in 1.36.2 when I didn't have problems. Now I'm using
FreeBSD's gmirror as software raid. I really doubt it matters, but
perhaps the internal locking is handled differently enough to be a
problem.
I'll post the configuration that produces the lockup, in case anyone
wants to check it for mistakes. Here's bacula-dir.conf and
bacula-sd.conf.
Everything is bacula version 1.38.1, even the file daemons on the windows
machines.
"border" is running FreeBSD 6, and got the same problems with FreeBSD 5.4.
I'm using the 4BSD scheduler.
/mirror is a gmirror.
This produces a hard freeze on "border" just a few seconds after multiple
jobs start accessing the storage daemon at the same time. Making the
changes in the storage daemon to have each "Device" point to a different
subdirectory delays the freeze by several minutes. Changing the
configuration to only have one "Storage" defined or cutting back "Maximum
Concurrent Jobs" on the director to 1 avoids the freeze.
bacula-dir.conf
--------------------------------------------------------------
Director { # define myself
Name = border-dir
DIRport = 9101 # where we listen for UA connections
QueryFile = "/usr/local/share/bacula/query.sql"
WorkingDirectory = "/var/db/bacula"
PidDirectory = "/var/run"
Maximum Concurrent Jobs = 4
FD Connect Timeout = 60 min #retry for an hour
Password = "xxx" # Console password
Messages = Standard
}
JobDefs {
Name = "DefaultJob"
Type = Backup
Level = Incremental
FileSet = "Windows"
Storage = File
Messages = Standard
Pool = Default
Priority = 10
Max Start Delay = 82800 #23 hours
}
Job {
Name = "Abigail"
JobDefs = "DefaultJob"
Client = "Abigail-fd"
Write Bootstrap = "/var/db/bacula/Abigail.bsr"
Storage = AbigailFile
Pool = Abigail
Schedule = "WeeklyCycle1"
}
Job {
Name = "Tani"
JobDefs = "DefaultJob"
Client = "Tani-fd"
Write Bootstrap = "/var/db/bacula/Tani.bsr"
Storage = TaniFile
Pool = Tani
Schedule = "WeeklyCycle2"
}
Job {
Name = "greentower"
JobDefs = "DefaultJob"
Client = "greentower-fd"
Write Bootstrap = "/var/db/bacula/greentower.bsr"
Storage = greentowerFile
Pool = greentower
Schedule = "WeeklyCycle3"
FileSet = "greentowerFileSet"
}
Job {
Name = "border"
JobDefs = "DefaultJob"
Client = "border-fd"
Write Bootstrap = "/var/db/bacula/border.bsr"
Storage = borderFile
Pool = border
Schedule = "WeeklyCycle4"
FileSet = "borderFileSet"
}
# Backup the catalog database (after the nightly save)
Job {
Name = "BackupCatalog"
JobDefs = "DefaultJob"
Client = "border-fd"
Level = Full
FileSet="Catalog"
Schedule = "WeeklyCycleAfterBackup"
# This creates an ASCII copy of the catalog
RunBeforeJob = "/usr/local/share/bacula/make_catalog_backup bacula bacula"
## This deletes the copy of the catalog
#RunAfterJob = "/usr/local/share/bacula/delete_catalog_backup"
Write Bootstrap = "/var/db/bacula/BackupCatalog.bsr"
Priority = 11 # run after main backup
}
Job {
Name = "RestoreAbigail"
Type = Restore
Client=Abigail-fd
FileSet="Windows"
Storage = AbigailFile
Pool = Abigail
Messages = Standard
Where = /tmp/bacula-restores
}
Job {
Name = "RestoreTani"
Type = Restore
Client=Tani-fd
FileSet="Windows"
Storage = TaniFile
Pool = Tani
Messages = Standard
Where = /tmp/bacula-restores
}
Job {
Name = "Restoregreentower"
Type = Restore
Client=greentower-fd
FileSet="greentowerFileSet"
Storage = greentowerFile
Pool = greentower
Messages = Standard
Where = /tmp/bacula-restores
}
Job {
Name = "Restoreborder"
Type = Restore
Client=border-fd
FileSet="borderFileSet"
Storage = borderFile
Pool = border
Messages = Standard
Where = /tmp/bacula-restores
}
FileSet {
Name = "Windows"
Include {
Options {
signature=MD5
compression=gzip
}
File = C:/
}
}
FileSet {
Name = "borderFileSet"
Include {
Options {
signature=MD5
compression=gzip
}
File = /usr/home
File = /root
File = /etc
File = /usr/local/etc
File = /var/mail
File = /var/log
File = /mirror
}
Exclude {
File = /mirror/bacula
}
}
FileSet {
Name = "greentowerFileSet"
Include {
Options {
signature=MD5
compression=gzip
}
File = /home
File = /root
File = /etc
File = /usr/local/etc
File = /var/log
}
}
# This is the backup of the catalog
FileSet {
Name = "Catalog"
Include {
Options {
signature = MD5
compression=gzip
}
File = /var/db/bacula/bacula.sql
}
}
Schedule {
Name = "WeeklyCycle1"
Run = Full 1st sun at 2:05
Run = Differential 2nd-5th sun at 2:05
Run = Incremental mon-sat at 2:05
}
Schedule {
Name = "WeeklyCycle2"
Run = Full 2nd sun at 2:05
Run = Differential 1st,3rd-5th sun at 2:05
Run = Incremental mon-sat at 2:05
}
Schedule {
Name = "WeeklyCycle3"
Run = Full 3rd sun at 2:05
Run = Differential 1st-2nd,4th-5th sun at 2:05
Run = Incremental mon-sat at 2:05
}
Schedule {
Name = "WeeklyCycle4"
Run = Full 4th sun at 2:05
Run = Differential 1st-3rd,5th sun at 2:05
Run = Incremental mon-sat at 2:05
}
# This schedule does the catalog. It starts after the WeeklyCycle
Schedule {
Name = "WeeklyCycleAfterBackup"
Run = Full sun-sat at 2:10
}
# Client (File Services) to backup
Client {
Name = Abigail-fd
Address = Abigail
FDPort = 9102
Catalog = MyCatalog
Password = "xxx" # password for FileDaemon
File Retention = 30 days # 30 days
Job Retention = 6 months # six months
AutoPrune = yes # Prune expired Jobs/Files
}
Client {
Name = Tani-fd
Address = Tani
FDPort = 9102
Catalog = MyCatalog
Password = "xxx" #password for FileDaemon
File Retention = 30 days # 30 days
Job Retention = 6 months # six months
AutoPrune = yes # Prune expired Jobs/Files
}
Client {
Name = greentower-fd
Address = greentower
FDPort = 9102
Catalog = MyCatalog
Password = "xxx" # password for FileDaemon
File Retention = 30 days # 30 days
Job Retention = 6 months # six months
AutoPrune = yes # Prune expired Jobs/Files
}
Client {
Name = border-fd
Address = border
FDPort = 9102
Catalog = MyCatalog
Password = "xxx" # password for FileDaemon
File Retention = 30 days # 30 days
Job Retention = 6 months # six months
AutoPrune = yes # Prune expired Jobs/Files
}
# Definiton of file storage device
Storage {
Name = File
Address = border # N.B. Use a fully qualified name here
SDPort = 9103
Password = "xxx"
Device = FileStorage
Media Type = File
}
Storage {
Name = AbigailFile
Address = border # N.B. Use a fully qualified name here
SDPort = 9103
Password = "xxx"
Device = AbigailFileStorage
Media Type = File
}
Storage {
Name = TaniFile
Address = border # N.B. Use a fully qualified name here
SDPort = 9103
Password = "xxx"
Device = TaniFileStorage
Media Type = File
}
Storage {
Name = greentowerFile
Address = border # N.B. Use a fully qualified name here
SDPort = 9103
Password = "xxx"
Device = greentowerFileStorage
Media Type = File
}
Storage {
Name = borderFile
Address = border # N.B. Use a fully qualified name here
SDPort = 9103
Password = "xxx"
Device = borderFileStorage
Media Type = File
}
# Generic catalog service
Catalog {
Name = MyCatalog
dbname = bacula; user = bacula; password = ""
}
#Messages section removed for this email
Pool {
Name = Default
LabelFormat = "Catalog-"
Pool Type = Backup
Recycle = yes # Bacula can automatically recycle Volumes
AutoPrune = yes # Prune expired volumes
Volume Retention = 365 days # one year
Accept Any Volume = yes # write on any volume in the pool
}
Pool {
Name = Abigail
LabelFormat = "Abigail-"
Pool Type = Backup
Recycle = yes
AutoPrune = yes
Volume Retention = 32 days
Maximum Volume Jobs = 1
Maximum Volumes = 33
}
Pool {
Name = Tani
LabelFormat = "Tani-"
Pool Type = Backup
Recycle = yes
AutoPrune = yes
Volume Retention = 32 days
Maximum Volume Jobs = 1
Maximum Volumes = 33
}
Pool {
Name = greentower
LabelFormat = "greentower-"
Pool Type = Backup
Recycle = yes
AutoPrune = yes
Volume Retention = 32 days
Maximum Volume Jobs = 1
Maximum Volumes = 33
}
Pool {
Name = border
LabelFormat = "border-"
Pool Type = Backup
Recycle = yes
AutoPrune = yes
Volume Retention = 32 days
Maximum Volume Jobs = 1
Maximum Volumes = 33
}
bacula-sd.conf
-------------------------------------------------------------
Storage { # definition of myself
Name = border-sd
SDPort = 9103 # Director's port
WorkingDirectory = "/var/db/bacula"
Pid Directory = "/var/run"
Maximum Concurrent Jobs = 20
}
Director {
Name = border-dir
Password = "xxx"
}
Device {
Name = FileStorage
Media Type = File
Archive Device = /mirror/bacula
# also tried /mirror/bacula/default
LabelMedia = yes; # lets Bacula label unlabeled media
Random Access = Yes;
AutomaticMount = yes; # when device opened, read it
RemovableMedia = no;
AlwaysOpen = no;
}
Device {
Name = AbigailFileStorage
Archive Device = /mirror/bacula
#also tried /mirror/bacula/Abigail
Media Type = File
Always Open = no
Removable Media = no
Random Access = yes
Label Media = yes
Automatic Mount = yes
}
Device {
Name = TaniFileStorage
Archive Device = /mirror/bacula
#also tried /mirror/bacula/Tani
Media Type = File
Always Open = no
Removable Media = no
Random Access = yes
Label Media = yes
Automatic Mount = yes
}
Device {
Name = greentowerFileStorage
Archive Device = /mirror/bacula
#also tried /mirror/bacula/greentower
Media Type = File
Always Open = no
Removable Media = no
Random Access = yes
Label Media = yes
Automatic Mount = yes
}
Device {
Name = borderFileStorage
Archive Device = /mirror/bacula
#also tried /mirror/bacula/border
Media Type = File
Always Open = no
Removable Media = no
Random Access = yes
Label Media = yes
Automatic Mount = yes
}
Messages {
Name = Standard
director = border-dir = all
}
-------------------------------------------------------
This SF.Net email is sponsored by the JBoss Inc. Get Certified Today
Register for a JBoss Training Course. Free Certification Exam
for All Training Attendees Through End of 2005. For more info visit:
http://ads.osdn.com/?ad_id=7628&alloc_id=16845&op=click
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users