Hello, Bacula 15.0.2 running on Rocky linux 9.4. I'm setting up a system that I will eventually deploy for a customer. This system is currently in the testing phase and nothing on it matters - yet.
I have a set of jobs and resources that write to local file volumes. After the local backup jobs are finished, lower priority copy jobs run with 'pooluncopiedjobs' to copy the local jobs to backblaze b2 s3 compatible storage. It mostly seems to work. The trouble is that I'm occasionally seeing errors like this: 20-Feb 15:56 td-bacula-sd JobId 47: Error: B2-TGU-Inc-0019/part.4 state=error retry=1/10 size=5.368 GB duration=1435s msg= S3_put_object ServiceUnavailable ERR=Service temporarily unavailable, please try again. CURL Effective URL: https://Craeon-TGU-Inc.s3.us-east-005.backblazeb2.com/B2-TGU-Inc-0019/part.4 CURL Effective URL: https://Craeon-TGU-Inc.s3.us-east-005.backblazeb2.com/B2-TGU-Inc-0019/part.4 Some parts are failing to upload. I verified with ls that the parts in question still remain on disk (I have set cloud cache to truncate on upload, so only part1 and parts that haven't uploaded should remain). In bconsole I ran 'cloud allpools upload' and that seemed to work, albeit slowly. It's still in process now with the few remaining part files that didn't upload. Upload speeds seem much slower than when the job was uploading part files. Maybe some sort of bandwidth limitation is in effect if the upload command was ran from within bconsole? During the actual copy job, uploads observed at my pfsense router were in the 400-800mbps range. Now that the copy jobs have finished and only the bconsole 'cloud allpools upload' command is running, sustained upload is in the 5-10mbps range, and I still have a couple 5 gb part files left. I know this system and my internet connection can do better, so why is the upload ran from the console going so slowly? I have continuous pings to 8.8.8.8 and the backblaze node that is taking these uploads, and the ping tests haven't missed a single packet during the upload period. What can I do to improve reliability here? I was considering setting up an admin job to periodically run 'cloud allpools upload'. How does one set up an admin job? I've searched the manual and read the examples, but I'm fuzzy on how it would actually work. I envisioned just setting the admin job's payload to be a bconsole command, but the examples in the manual didn't seem to be exactly that. Maybe I'm just confused. The bigger problem seems to be that I don't know how to monitor whether a part subsequently uploaded successfully. I am working on deploying Bill A's job report script, and that's likely going to be my monitoring solution. Maybe a bad exit status for an admin job tasked with uploading any leftover part files could send up a flag that something is wrong and a part file hasn't been uploaded? I'm using the s3 driver instead of the amazon driver. I read that the new amazon driver is much better than the s3 driver, but in my testing the amazon driver didn't seem to want to connect to a non-aws server. I welcome any advice or assistance. dir config: Job { Name = "Backup-akita-job" Level = "Incremental" Client = "akita-fd" Fileset = "Windows-All-Drives-fs" Storage = "Synology-Local" Pool = "Synology-Local-Inc" JobDefs = "Synology-Local" } Job { Name = "Copy-Control-job" Type = Copy Level = Full Client = td-bacula-fd Schedule = "Copy-End-Of-Day" FileSet = "None" Messages = Standard Pool = None Storage = "Synology-Local" Maximum Concurrent Jobs = 4 Selection Type = PoolUncopiedJobs Priority = 20 JobDefs = "Synology-Local" } Schedule { Name = "Biannual-Cycle" Run = Level="Full" jan,jul 1st sun at 23:05 Run = Level="Differential" feb,mar,apr,may,jun,aug,sep,oct,nov,dec 1st sun at 23:05 Run = Level="Incremental" sun,mon,tue,wed,thu,fri,sat at 23:05 } Schedule { Name = "Copy-End-Of-Day" Run = Pool="Synology-Local-Full" at 23:50 Run = Pool="Synology-Local-Diff" at 23:51 Run = Pool="Synology-Local-Inc" at 23:52 } Storage { Name = "B2-TGU-Full" Description = "B2-TGU-Full" SdPort = 9103 Address = "td-bacula" Password = "7D9QQiqfzdG3vkjZ4RclYVtWdfKDTfUrruHi2ULTcx9B" Device = "B2-TGU-Full" MediaType = "B2-TGU-Full" Autochanger = "B2-TGU-Full" MaximumConcurrentJobs = 50 } Pool { # cloud full pool Name = "B2-TGU-Full" Description = "B2-TGU-Full" PoolType = "Backup" LabelFormat = "B2-TGU-Full-" LabelType = "Bacula" MaximumVolumeJobs = 1 FileRetention = 18months JobRetention = 18months VolumeRetention = 18months Storage = "B2-TGU-Full" CacheRetention = 1minute ActionOnPurge=Truncate } FileSet { Name = "Windows-All-Drives-fs" Include { Options { Exclude = "yes" IgnoreCase= "yes" # wilddir = "*:/Windows" wildfile = "*:/pagefile.sys" wildfile = "*:/hiberfil.sys" wildfile = "*:/swapfile.sys" # wild = "*:/$Recycle.Bin" # wild = "*:/$recycler" # wild = "*:/Program Files (x86)" # wild = "*:/Program Files" # wild = "*:/recycler/" # wild = "*:/users/all users/microsoft help/" # wild = "*:/users/all users/microsoft/" # wild = "*:/i386/" # wild = "*:/msocache/" } Options { #compression=GZIP #compression=LZO compression=ZSTD IgnoreCase= "yes" Signature = "SHA512" OneFS = "no" DriveType = fixed, removable verify = pins3 } File = / } } JobDefs { Name = "Synology-Local" Type = "Backup" Level = "Incremental" Messages = "Standard" AllowDuplicateJobs = no # We don't want duplicate jobs. What action is taken is determined by the variables below. # See flowchart Figure 23.2 in Bacula 15.x Main manual, probably page 245 in the PDF. CancelLowerLevelDuplicates = yes # If a lower level job (example: inc) is running or queued and a higher level job (Example: diff or full) is added to the queue, then the lower level job will be cancelled. CancelQueuedDuplicates = yes # This will cancel any queued duplicate jobs. Pool = "Synology-Local-Inc" FullBackupPool = "Synology-Local-Full" IncrementalBackupPool = "Synology-Local-Inc" DifferentialBackupPool = "Synology-Local-Diff" Client = "td-bacula-fd" Fileset = "Windows-All-Drives-fs" Schedule = "Biannual-Cycle" WriteBootstrap = "/mnt/synology/bacula/BSR/%n.bsr" MaxFullInterval = 180days MaxDiffInterval = 90days SpoolAttributes = yes Priority = 10 ReRunFailedLevels = yes # (if previous full or diff failed, current job will be upgraded to match failed job's level). a failed job is defined as one that has not terminated normally, which includes any running job of the same name. Cannot allow duplicate queued jobs. RescheduleOnError = no Accurate = yes } sd config: Device { Name = "B2-TGU-Full-001" # there are 5 more like this one Description = "B2-TGU-Full" MediaType = "B2-TGU-Full" DeviceType = "Cloud" ArchiveDevice = "/mnt/synology/bacula/B2-TGU-Full" RemovableMedia = no RandomAccess = yes AutomaticMount = yes LabelMedia = yes Autochanger = yes ReadOnly = no MaximumFileSize = 5368709120 MaximumConcurrentJobs = 1 DriveIndex = 0 MaximumPartSize = 5368709120 Cloud = "B2-TGU-Full" } Autochanger { Name = "B2-TGU-Full" Device = "B2-TGU-Full-001" Device = "B2-TGU-Full-002" Device = "B2-TGU-Full-003" Device = "B2-TGU-Full-004" Device = "B2-TGU-Full-005" Device = "B2-TGU-Full-006" ChangerDevice = "/dev/null" ChangerCommand = "/dev/null" } Cloud { Name = "B2-TGU-Full" Driver = "S3" HostName = "s3.us-east-005.backblazeb2.com" BucketName = "Craeon-TGU-Full" AccessKey = "REDACTED" SecretKey = "REDACTED" Protocol = "HTTPS" UriStyle = "VirtualHost" TruncateCache = "AfterUpload" Upload = "EachPart" } Regards, Robert Gerber 402-237-8692 r...@craeon.net
_______________________________________________ Bacula-users mailing list Bacula-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/bacula-users