> It'd be really nice if Bacula could store this information in a more > query-friendly way.
Just for kicks, I've written an extension to PostgreSQL that lets you decode the lstat field into a proper `stat' structure you can query, build indexes on, etc. It's basically just a PostgreSQL function interface wrapper around the base64 and stat structure decoding routines from the Bacula sources. If you're not using PostgreSQL ... well, I guess it could probably be ported to MySQL's function interface, though you'd be on your own doing so. If you use SQLite (or, IMO, MySQL) .... time to upgrade? I've attached the source code, which should build with a simple "make; make install" on any reasonably sensible UNIX with a gcc compiler. I didn't use Pg's extension build system since it was so easy to just write a Makefile, but you could probably adapt it to it pretty easily. Example of use: bacula=# select decode_stat(lstat) from file limit 10; decode_stat ------------------------------------------------------------------------------------------ (64782,49578,33152,1,105,8,0,8597,24,1231311060,1231311060,1233542770,-1076656616,0) (64782,-1201252948,33152,1,105,8,0,22845,48,1188919531,1188919531,1233542525,16777216,0) (64782,-1201252948,33152,1,105,8,0,35196,72,1197340795,1197340795,1233542530,30836799,0) (64782,-1201252948,33152,1,105,8,0,3383,8,1214876142,1214876142,1233542823,30836799,0) (64782,-1201252948,33152,1,105,8,0,3004,8,1190704535,1190704535,1233542394,30836799,0) (64782,-1201252948,33152,1,105,8,0,1428,8,1170831227,1170831227,1233542575,30836799,0) (64782,-1201252948,33152,1,105,8,0,12073,24,1192006848,1192006848,1233542442,30836799,0) (64782,-1201252948,33152,1,105,8,0,38046,80,1149858815,1115606509,1233542603,30836799,0) (64782,-1201252948,33152,1,105,8,0,6511,16,1149858788,1092056820,1233542500,30836799,0) (64782,-1201252948,33152,1,105,8,0,2243,8,1149858835,1131671960,1233542505,30836799,0) (10 rows) The data structure the function returns: bacula=# \d stat Composite type "public.stat" Column | Type ------------+--------- st_dev | integer st_ino | integer st_mod | integer st_nlink | integer st_uid | integer st_gid | integer st_rdev | bigint st_size | integer st_blksize | integer st_blocks | integer st_atime | integer st_mtime | integer st_ctime | integer linkfi | integer Files and sizes: bacula=# SELECT filename.name, x.st_size FROM (select file.*, (decode_stat(lstat)).st_size FROM file LIMIT 10) AS x INNER JOIN filename ON x.filenameid = filename.filenameid; name | st_size -------+--------- 8917. | 59627 9554. | 147590 5750. | 747 6141. | 100195 6209. | 14146 6169. | 5729 272. | 1981 1303. | 1752 9151. | 970 6487. | 448236 (10 rows) Making an index on file size, then selecting the names and sizes of the biggest 20 files: CREATE OR REPLACE FUNCTION decode_lstat_size(text) RETURNS int4 AS $$ SELECT (decode_stat($1)).st_size; $$ LANGUAGE SQL IMMUTABLE STRICT; CREATE INDEX file_size ON file (decode_lstat_size(lstat)); SELECT filename.name, decode_lstat_size(lstat) FROM file INNER JOIN filename ON file.filenameid = filename.filenameid ORDER BY decode_lstat_size(lstat) DESC LIMIT 20; name | decode_lstat_size --------------------------------+------------------- class40.img.Bin | 2097152768 ponserverxp.img | 2097152000 ponserverxp.img | 2097152000 ponserverxp.img | 2097152000 ponserverxp.img | 2097152000 ponserverxp.img | 2097152000 Post_Studio_SOE_Aug_2007.dmg | 2053557781 alder-000002.vmdk | 1988952064 alder-000002.vmdk | 1988952064 profiles.star | 1729841152 GarageBandExtraContent.tar | 1199718400 GarageBandExtraContent.tar | 1198376960 xcode_2.4.1_8m1910_6936315.dmg | 968079156 INBOX | 949566630 old photos.psd | 928456141 old photos.psd | 928456141 old photos.psd | 928456141 old photos.psd | 928456141 old photos.psd | 928456141 old photos.psd | 928456141 (20 rows) Time: 1.617 ms -- Craig Ringer
/* Bacula® - The Network Backup Solution Copyright (C) 2000-2007 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. This program is Free Software; you can redistribute it and/or modify it under the terms of version two of the GNU General Public License as published by the Free Software Foundation and included in the file LICENSE. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Bacula® is a registered trademark of Kern Sibbald. The licensor of Bacula is the Free Software Foundation Europe (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, Switzerland, email:f...@fsfeurope.org. */ /* * Generic base 64 input and output routines * * Written by Kern E. Sibbald, March MM. * * Version $Id: base64.c 8495 2009-02-28 14:52:14Z marcovw $ */ #ifdef TEST_MODE #include <glob.h> #endif #include <inttypes.h> #include <cstring> static uint8_t const base64_digits[64] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; static int base64_inited = 0; static uint8_t base64_map[128]; /* Initialize the Base 64 conversion routines */ void base64_init(void) { int i; memset(base64_map, 0, sizeof(base64_map)); for (i=0; i<64; i++) base64_map[(uint8_t)base64_digits[i]] = i; base64_inited = 1; } /* Convert a value to base64 characters. * The result is stored in where, which * must be at least 8 characters long. * * Returns the number of characters * stored (not including the EOS). */ int to_base64(intmax_t value, char *where) { uintmax_t val; int i = 0; int n; /* Handle negative values */ if (value < 0) { where[i++] = '-'; value = -value; } /* Determine output size */ val = value; do { val >>= 6; i++; } while (val); n = i; /* Output characters */ val = value; where[i] = 0; do { where[--i] = base64_digits[val & (uintmax_t)0x3F]; val >>= 6; } while (val); return n; } /* * Convert the Base 64 characters in where to * a value. No checking is done on the validity * of the characters!! * * Returns the value. */ int from_base64(intmax_t *value, char *where) { uintmax_t val = 0; int i, neg; if (!base64_inited) base64_init(); /* Check if it is negative */ i = neg = 0; if (where[i] == '-') { i++; neg = 1; } /* Construct value */ while (where[i] != 0 && where[i] != ' ') { val <<= 6; val += base64_map[(uint8_t)where[i++]]; } *value = neg ? -(intmax_t)val : (intmax_t)val; return i; } /* * Encode binary data in bin of len bytes into * buf as base64 characters. * * If compatible is true, the bin_to_base64 routine will be compatible * with what the rest of the world uses. * * Returns: the number of characters stored not * including the EOS */ int bin_to_base64(char *buf, int buflen, char *bin, int binlen, int compatible) { uint32_t reg, save, mask; int rem, i; int j = 0; reg = 0; rem = 0; buflen--; /* allow for storing EOS */ for (i=0; i < binlen; ) { if (rem < 6) { reg <<= 8; if (compatible) { reg |= (uint8_t)bin[i++]; } else { reg |= (int8_t)bin[i++]; } rem += 8; } save = reg; reg >>= (rem - 6); if (j < buflen) { buf[j++] = base64_digits[reg & 0x3F]; } reg = save; rem -= 6; } if (rem && j < buflen) { mask = (1 << rem) - 1; if (compatible) { buf[j++] = base64_digits[(reg & mask) << (6 - rem)]; } else { buf[j++] = base64_digits[reg & mask]; } } buf[j] = 0; return j; } #ifdef BIN_TEST int main(int argc, char *argv[]) { int xx = 0; int len; char buf[100]; char junk[100]; int i; #ifdef xxxx for (i=0; i < 1000; i++) { bin_to_base64(buf, sizeof(buf), (char *)&xx, 4, true); printf("xx=%s\n", buf); xx++; } #endif junk[0] = 0xFF; for (i=1; i<100; i++) { junk[i] = junk[i-1]-1; } len = bin_to_base64(buf, sizeof(buf) junk, 16, true); printf("len=%d junk=%s\n", len, buf); return 0; } #endif #ifdef TEST_MODE static int errfunc(const char *epath, int eernoo) { printf("in errfunc\n"); return 1; } /* * Test the base64 routines by encoding and decoding * lstat() packets. */ int main(int argc, char *argv[]) { char where[500]; int i; glob_t my_glob; char *fname; struct stat statp; struct stat statn; int debug_level = 0; char *p; time_t t = 1028712799; if (argc > 1 && strcmp(argv[1], "-v") == 0) debug_level++; base64_init(); my_glob.gl_offs = 0; glob("/etc/grub.conf", GLOB_MARK, errfunc, &my_glob); for (i=0; my_glob.gl_pathv[i]; i++) { fname = my_glob.gl_pathv[i]; if (lstat(fname, &statp) < 0) { berrno be; printf("Cannot stat %s: %s\n", fname, be.bstrerror(errno)); continue; } encode_stat(where, &statp, 0, 0); printf("Encoded stat=%s\n", where); #ifdef xxx p = where; p += to_base64((intmax_t)(statp.st_atime), p); *p++ = ' '; p += to_base64((intmax_t)t, p); printf("%s %s\n", fname, where); printf("%s %lld\n", "st_dev", (intmax_t)statp.st_dev); printf("%s %lld\n", "st_ino", (intmax_t)statp.st_ino); printf("%s %lld\n", "st_mode", (intmax_t)statp.st_mode); printf("%s %lld\n", "st_nlink", (intmax_t)statp.st_nlink); printf("%s %lld\n", "st_uid", (intmax_t)statp.st_uid); printf("%s %lld\n", "st_gid", (intmax_t)statp.st_gid); printf("%s %lld\n", "st_rdev", (intmax_t)statp.st_rdev); printf("%s %lld\n", "st_size", (intmax_t)statp.st_size); printf("%s %lld\n", "st_blksize", (intmax_t)statp.st_blksize); printf("%s %lld\n", "st_blocks", (intmax_t)statp.st_blocks); printf("%s %lld\n", "st_atime", (intmax_t)statp.st_atime); printf("%s %lld\n", "st_mtime", (intmax_t)statp.st_mtime); printf("%s %lld\n", "st_ctime", (intmax_t)statp.st_ctime); #endif if (debug_level) printf("%s: len=%d val=%s\n", fname, strlen(where), where); decode_stat(where, &statn); if (statp.st_dev != statn.st_dev || statp.st_ino != statn.st_ino || statp.st_mode != statn.st_mode || statp.st_nlink != statn.st_nlink || statp.st_uid != statn.st_uid || statp.st_gid != statn.st_gid || statp.st_rdev != statn.st_rdev || statp.st_size != statn.st_size || statp.st_blksize != statn.st_blksize || statp.st_blocks != statn.st_blocks || statp.st_atime != statn.st_atime || statp.st_mtime != statn.st_mtime || statp.st_ctime != statn.st_ctime) { printf("%s: %s\n", fname, where); encode_stat(where, &statn, 0, 0); printf("%s: %s\n", fname, where); printf("NOT EQAL\n"); } } globfree(&my_glob); printf("%d files examined\n", i); to_base64(UINT32_MAX, where); printf("UINT32_MAX=%s\n", where); return 0; } #endif
/* * Generic base 64 input and output routines * * Written by Kern E. Sibbald, March MM. * * Version $Id: base64.h 7380 2008-07-14 10:42:59Z kerns $ */ /* Bacula® - The Network Backup Solution Copyright (C) 2000-2006 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. This program is Free Software; you can redistribute it and/or modify it under the terms of version two of the GNU General Public License as published by the Free Software Foundation and included in the file LICENSE. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Bacula® is a registered trademark of Kern Sibbald. The licensor of Bacula is the Free Software Foundation Europe (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, Switzerland, email:f...@fsfeurope.org. */ /* Maximum size of len bytes after base64 encoding */ #define BASE64_SIZE(len) ((4 * len + 2) / 3 + 1) int from_base64(intmax_t *value, char *where); int bin_to_base64(char *buf, int buflen, char *bin, int binlen, int compatible);
#include <inttypes.h> #include "base64.h" extern "C" { #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> } /* Do casting according to unknown type to keep compiler happy */ #ifdef HAVE_TYPEOF #define plug(st, val) st = (typeof st)val #else #if !HAVE_GCC & HAVE_SUN_OS /* Sun compiler does not handle templates correctly */ #define plug(st, val) st = val #elif __sgi #define plug(st, val) st = val #else /* Use templates to do the casting */ template <class T> void plug(T &st, uint64_t val) { st = static_cast<T>(val); } #endif #endif extern "C" int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI); /* Decode a stat packet from base64 characters */ int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI) { char *p = buf; int64_t val; p += from_base64(&val, p); plug(statp->st_dev, val); p++; p += from_base64(&val, p); plug(statp->st_ino, val); p++; p += from_base64(&val, p); plug(statp->st_mode, val); p++; p += from_base64(&val, p); plug(statp->st_nlink, val); p++; p += from_base64(&val, p); plug(statp->st_uid, val); p++; p += from_base64(&val, p); plug(statp->st_gid, val); p++; p += from_base64(&val, p); plug(statp->st_rdev, val); p++; p += from_base64(&val, p); plug(statp->st_size, val); p++; #ifndef HAVE_MINGW p += from_base64(&val, p); plug(statp->st_blksize, val); p++; p += from_base64(&val, p); plug(statp->st_blocks, val); p++; #else p += from_base64(&val, p); // plug(statp->st_blksize, val); p++; p += from_base64(&val, p); // plug(statp->st_blocks, val); p++; #endif p += from_base64(&val, p); plug(statp->st_atime, val); p++; p += from_base64(&val, p); plug(statp->st_mtime, val); p++; p += from_base64(&val, p); plug(statp->st_ctime, val); /* Optional FileIndex of hard linked file data */ if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) { p++; p += from_base64(&val, p); *LinkFI = (uint32_t)val; } else { *LinkFI = 0; return 0; } /* FreeBSD user flags */ if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) { p++; p += from_base64(&val, p); #ifdef HAVE_CHFLAGS plug(statp->st_flags, val); } else { statp->st_flags = 0; #endif } /* Look for data stream id */ if (*p == ' ' || (*p != 0 && *(p+1) == ' ')) { p++; p += from_base64(&val, p); } else { val = 0; } return (int)val; }
DROP FUNCTION IF EXISTS decode_stat(stat); DROP TYPE IF EXISTS stat CASCADE; CREATE TYPE stat AS ( st_dev int4, st_ino int4, st_mod int4, st_nlink int4, st_uid int4, st_gid int4, st_rdev int8, st_size int4, st_blksize int4, st_blocks int4, st_atime int4, st_mtime int4, st_ctime int4, -- st_flags int4, -- Unused LinkFI int4 ); CREATE OR REPLACE FUNCTION decode_stat( text ) RETURNS stat AS 'decode_stat.so', 'bacula_decode_stat' LANGUAGE C STRICT IMMUTABLE;
default: decode_stat.so base64.o: Makefile base64.c base64.h g++ -g -c -fpic -fno-exceptions -o base64.o base64.c decode_stat.o: Makefile decode_stat.c base64.h g++ -g -c -fpic -fno-exceptions -o decode_stat.o decode_stat.c pgfunc.o: Makefile pgfunc.c gcc -g -c -fpic -o pgfunc.o pgfunc.c -I `pg_config --includedir-server` decode_stat.so: Makefile decode_stat.o pgfunc.o base64.o g++ -fpic -shared -o decode_stat.so base64.o pgfunc.o decode_stat.o clean: rm *.o *.so install: decode_stat.so cp decode_stat.so `pg_config --pkglibdir`/decode_stat.so
#include "postgres.h" #include <string.h> #include "fmgr.h" #include "executor/executor.h" #include "funcapi.h" #include "sys/types.h" #include "sys/stat.h" #include "unistd.h" int decode_stat(char *buf, struct stat *statp, int32_t *LinkFI); #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif /* * text_to_cstring * (from proposed Pg patches) * Create a palloc'd, null-terminated C string from a text value. We support * being passed a compressed or toasted text value. This is a bit bogus since * such values shouldn't really be referred to as "text *", but it seems useful * for robustness. If we didn't handle that case here, we'd need another routine * that did, anyway. */ static char * bds_text_to_cstring (const text * t) { char *result; text *tunpacked = pg_detoast_datum_packed ((struct varlena *) t); int len = VARSIZE_ANY_EXHDR (tunpacked); result = (char *) palloc (len + 1); memcpy (result, VARDATA_ANY (tunpacked), len); result[len] = '\0'; if (tunpacked != t) pfree (tunpacked); return result; } PG_FUNCTION_INFO_V1(bacula_decode_stat); Datum bacula_decode_stat(PG_FUNCTION_ARGS) { /* Result tuple storage and interim values */ TupleDesc tupledesc; Datum values[15]; HeapTuple heaptuple; int tuplen; bool *nulls; /* Text to parse */ text *t; char *c; /* Storage for result from decode_stat(...) */ int32_t LinkFI; struct stat st; /* obtain a (maybe palloc'd) buffer of the text we'll work on, zero-terminated. */ t = PG_GETARG_TEXT_P(0); c = bds_text_to_cstring(t); PG_FREE_IF_COPY(t, 0); /* Use Bacula's code to decode the stat string */ decode_stat(c, &st, &LinkFI); pfree(c); /* If `t' was palloc'd, free it */ /* Set up to return a tuple, and complain if we're in a context where we can't */ if( get_call_result_type( fcinfo, NULL, &tupledesc ) != TYPEFUNC_COMPOSITE ) ereport( ERROR, ( errcode( ERRCODE_FEATURE_NOT_SUPPORTED ), errmsg( "function returning record called in context " "that cannot accept type record" ))); /* Copy the stat data into a Datum array we can feed to Pg's tuple creation code */ values[0] = Int32GetDatum( st.st_dev ); values[1] = Int32GetDatum( st.st_ino ); values[2] = Int32GetDatum( st.st_mode ); values[3] = Int32GetDatum( st.st_nlink ); values[4] = Int32GetDatum( st.st_uid ); values[5] = Int32GetDatum( st.st_gid ); values[6] = Int64GetDatum( st.st_rdev ); values[7] = Int32GetDatum( st.st_size ); values[8] = Int32GetDatum( st.st_blksize ); values[9] = Int32GetDatum( st.st_blocks ); values[10] = Int32GetDatum( st.st_atime ); values[11] = Int32GetDatum( st.st_mtime ); values[12] = Int32GetDatum( st.st_ctime ); /* values[13] = Int32GetDatum( st.st_flags ); */ /* fbsd only */ values[13] = Int32GetDatum( LinkFI ); /* Create the return tuple with the datum array */ BlessTupleDesc( tupledesc ); tuplen = tupledesc->natts; nulls = palloc( tuplen * sizeof( bool ) ); memset( nulls, 0, tuplen * sizeof( bool ) ); heaptuple = heap_form_tuple( tupledesc, values, nulls ); pfree(nulls); PG_RETURN_DATUM( HeapTupleGetDatum( heaptuple ) ); };
------------------------------------------------------------------------------
_______________________________________________ Bacula-users mailing list Bacula-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/bacula-users