Hello, here's my take on masking data when using pg_dump
The main idea is using PostgreSQL functions to replace data during a SELECT.
When table data is dumped SELECT a,b,c,d ... from ... query is generated, the
columns that are marked for masking are replaced with result of functions on
those columns
Example: columns name, count are to be masked, so the query will look as such:
SELECT id, mask_text(name), mask_int(count), date from ...
So about the interface: I added 2 more command-line options:
--mask-columns, which specifies what columns from what tables will be masked
usage example:
--mask-columns "t1.name, t2.description" - both columns will be
masked with the same corresponding function
or --mask-columns name - ALL columns with name "name" from all
dumped tables will be masked with correspoding function
--mask-function, which specifies what functions will mask data
usage example:
--mask-function mask_int - corresponding columns will be masked
with function named "mask_int" from default schema (public)
or --mask-function my_schema.mask_varchar - same as above but with
specified schema where the function is stored
or --mask-function somedir/filename - the function is "defined"
here - more on the structure below
Structure of the file with function description:
First row - function name (with or without schema name)
Second row - type of in and out value (the design is to only work with same
input/output type so no int-to-text shenanigans)
Third row - language of function
Forth and later rows - body of a function
Example of such file:
mask_text
text
plpgsql
res := '***';
First iteration of using file-described functions used just plain SQL query,
but since it executed during read-write connection, some things such as writing
"DROP TABLE t1;" after the CREATE FUNCTION ...; were possible.
Now even if something harmful is written in function body, it will be executed
during dump-read-only connection, where it will just throw an error
About "corresponding columns and functions" - masking functions and columns are
paired with eachother based on the input order, but --masking-columns and
--masking-functions don't have to be subsequent.
Example: pg_dump -t table_name --mask-columns name --mask-colums count
--mask-function mask_text --mask-function mask_int - here 'name' will be paired
with function 'mask_text' and 'count' with 'mask_int'
Patch includes regression tests
I'm open to discussion of this patch
Best regards,
Oleg Tselebrovskiy
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index bd9b066e4e..457290064d 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -97,6 +97,14 @@ typedef enum OidOptions
zeroAsNone = 4
} OidOptions;
+typedef struct
+{
+ char* column; /* name of masked column */
+ char* table; /* name of table where masked column is stored */
+ char* func; /* name of masking function */
+ char* schema; /* name of schema where masking function is stored */
+} MaskColumnInfo;
+
/* global decls */
static bool dosync = true; /* Issue fsync() to make dump durable on disk. */
@@ -105,6 +113,14 @@ static Oid g_last_builtin_oid; /* value of the last builtin oid */
/* The specified names/patterns should to match at least one entity */
static int strict_names = 0;
+/*
+* mask_column_info_list contains info about every to-be-masked column:
+* its name, a name its table (if nothing is specified - mask all columns with this name),
+* name of masking function and name of schema containing this function (public if not specified)
+*/
+
+static SimplePtrList mask_column_info_list = {NULL, NULL};
+
/*
* Object inclusion/exclusion lists
*
@@ -160,6 +176,8 @@ static int nseclabels = 0;
(obj)->dobj.name)
static void help(const char *progname);
+static void addFuncToDatabase(MaskColumnInfo* cur_mask_column_info,
+ FILE* mask_func_file, DumpOptions* dopt);
static void setup_connection(Archive *AH,
const char *dumpencoding, const char *dumpsnapshot,
char *use_role);
@@ -184,6 +202,8 @@ static void prohibit_crossdb_refs(PGconn *conn, const char *dbname,
static NamespaceInfo *findNamespace(Oid nsoid);
static void dumpTableData(Archive *fout, const TableDataInfo *tdinfo);
+static void maskColumns(TableInfo *tbinfo, char* current_column_name,
+ PQExpBuffer* q, SimpleStringList* column_names);
static void refreshMatViewData(Archive *fout, const TableDataInfo *tdinfo);
static const char *getRoleName(const char *roleoid_str);
static void collectRoleNames(Archive *fout);
@@ -342,6 +362,19 @@ main(int argc, char **argv)
int numWorkers = 1;
int compressLevel = -1;
int plainText = 0;
+
+ /* needed for masking */
+ SimpleStringList mask_columns_list = {NULL, NULL};
+ SimpleStringList mask_func_list = {NULL, NULL};
+ SimpleStringListCell *mask_func_cell;
+ SimpleStringListCell *mask_columns_cell;
+ SimplePtrListCell *mask_column_info_cell;
+ char *column_name_buffer;
+ char *table_name_buffer;
+ char *func_name_buffer;
+ char *schema_name_buffer;
+ FILE *mask_func_file;
+
ArchiveFormat archiveFormat = archUnknown;
ArchiveMode archiveMode;
@@ -413,6 +446,8 @@ main(int argc, char **argv)
{"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1},
{"rows-per-insert", required_argument, NULL, 10},
{"include-foreign-data", required_argument, NULL, 11},
+ {"mask-columns", required_argument, NULL, 12},
+ {"mask-function", required_argument, NULL, 13},
{NULL, 0, NULL, 0}
};
@@ -623,6 +658,14 @@ main(int argc, char **argv)
optarg);
break;
+ case 12: /* columns for masking */
+ simple_string_list_append(&mask_columns_list, optarg);
+ break;
+
+ case 13: /* function for masking - can be SQL function from .sql file,
+ declared in CLI or declared in DB*/
+ simple_string_list_append(&mask_func_list, optarg);
+ break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -658,6 +701,101 @@ main(int argc, char **argv)
if (dopt.binary_upgrade)
dopt.sequence_data = 1;
+ /*
+ * Add all columns and funcions to list of MaskColumnInfo structures,
+ */
+
+ mask_func_cell = mask_func_list.head;
+ mask_columns_cell = mask_columns_list.head;
+
+ while (mask_columns_cell && mask_func_cell)
+ {
+ char* func = mask_func_cell->val;
+ char* column = strtok(mask_columns_cell->val, " ,\'\"");
+ char* table = (char*) pg_malloc(256 * sizeof(char));
+ char* schema = (char*) pg_malloc(256 * sizeof(char));
+ while (column != NULL)
+ {
+ MaskColumnInfo* new_mask_column = (MaskColumnInfo*) pg_malloc(sizeof(MaskColumnInfo));
+ new_mask_column->column = column;
+ new_mask_column->table = table;
+ new_mask_column->func = func;
+ new_mask_column->schema = schema;
+ simple_ptr_list_append(&mask_column_info_list, new_mask_column);
+ table = (char*) pg_malloc(256 * sizeof(char));
+ column = strtok(NULL, " ,\'\"");
+ }
+ mask_columns_cell = mask_columns_cell->next;
+ mask_func_cell = mask_func_cell->next;
+ }
+
+ /*
+ * If there is not enough params of one type throw error
+ */
+
+ if (mask_columns_cell != NULL || mask_func_cell != NULL)
+ pg_fatal("amount of --mask-columns and --mask-function doesn't match");
+
+ /*
+ * Extract tablenames from list of columns - done here so that strtok isn't
+ * disturbed in previous cycle
+ */
+
+ mask_column_info_cell = mask_column_info_list.head;
+
+ while (mask_column_info_cell != NULL)
+ {
+ MaskColumnInfo* cur_mask_column_info = (MaskColumnInfo*) mask_column_info_cell->ptr;
+ table_name_buffer = strtok(cur_mask_column_info->column, ".");
+ column_name_buffer = strtok(NULL, ".");
+ if (column_name_buffer == NULL) /* found column without tablename */
+ {
+ strcpy(cur_mask_column_info->table, "");
+ strcpy(cur_mask_column_info->column, table_name_buffer);
+ }
+ else
+ {
+ strcpy(cur_mask_column_info->table, table_name_buffer);
+ strcpy(cur_mask_column_info->column, column_name_buffer);
+ }
+ mask_column_info_cell = mask_column_info_cell->next;
+ }
+
+ /*
+ * Check if --mask-function is a name of function or a filepath
+ * if filepath - open file, start connection, execute script, close connection
+ */
+
+ mask_column_info_cell = mask_column_info_list.head;
+
+ while (mask_column_info_cell != NULL)
+ {
+ MaskColumnInfo* cur_mask_column_info = (MaskColumnInfo*) mask_column_info_cell->ptr;
+ func_name_buffer = pg_strdup(cur_mask_column_info->func);
+ canonicalize_path(func_name_buffer);
+ mask_func_file = fopen(func_name_buffer, "r");
+ if (mask_func_file != NULL) /* then it is a file with function*/
+ {
+ addFuncToDatabase(cur_mask_column_info, mask_func_file, &dopt);
+ }
+ else /* function stored in DB*/
+ {
+ schema_name_buffer = strtok(cur_mask_column_info->func, ".");
+ func_name_buffer = strtok(NULL, ".");
+ if (func_name_buffer == NULL) /* found function without schemaname */
+ {
+ strcpy(cur_mask_column_info->schema, "public");
+ strcpy(cur_mask_column_info->func, schema_name_buffer);
+ }
+ else
+ {
+ strcpy(cur_mask_column_info->schema, schema_name_buffer);
+ strcpy(cur_mask_column_info->func, func_name_buffer);
+ }
+ }
+ mask_column_info_cell = mask_column_info_cell->next;
+ }
+
if (dopt.dataOnly && dopt.schemaOnly)
pg_fatal("options -s/--schema-only and -a/--data-only cannot be used together");
@@ -1034,6 +1172,11 @@ help(const char *progname)
" servers matching PATTERN\n"));
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
printf(_(" --load-via-partition-root load partitions via the root table\n"));
+ printf(_(" --mask-columns names of columns that will be masked \n"
+ " if table name is not specified, mask in all tables\n"));
+ printf(_(" --mask-function name of function that will mask corresponding columns\n"
+ " can specify schema in which function is stored\n"
+ " can use filepath to file with function arguments\n"));
printf(_(" --no-comments do not dump comments\n"));
printf(_(" --no-publications do not dump publications\n"));
printf(_(" --no-security-labels do not dump security label assignments\n"));
@@ -1069,6 +1212,77 @@ help(const char *progname)
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}
+/*
+* addFuncToDatabase - parses file specified in command line, executes query from it
+* adding masking function to database
+*/
+
+//TODO - поменÑÑÑ ÑÑÑÑкÑÑÑÑ Ñайла, и ÑделаÑÑ ÑоÑÑавление запÑоÑа внÑÑÑи кода
+//ÑÑÑÑкÑÑÑа Ñайла - func_name\n argument_type\n func_language\n function_body\n
+
+static void
+addFuncToDatabase(MaskColumnInfo* cur_mask_column_info, FILE* mask_func_file, DumpOptions* dopt)
+{
+ PGconn *connection;
+ PQExpBufferData query;
+ char* conn_params = (char*) pg_malloc(256 * sizeof(char));
+ char* common_buff = (char*) pg_malloc(64 * sizeof(char));
+ char* func_name_buff = (char*) pg_malloc(64 * sizeof(char));
+ char* argument_type_buff = (char*) pg_malloc(64 * sizeof(char));
+ char* func_language_buff = (char*) pg_malloc(64 * sizeof(char));
+ char* func_body_buff = (char*) pg_malloc(512 * sizeof(char));
+ char* schema_name_buff = (char*) pg_malloc(64 * sizeof(char));
+
+ fgets(common_buff, 64, mask_func_file);
+ func_name_buff = strdup(strtok(common_buff, " ,\n\t"));
+ fgets(common_buff, 64, mask_func_file);
+ argument_type_buff = strdup(strtok(common_buff, " .,\n\t"));
+ fgets(common_buff, 64, mask_func_file);
+ func_language_buff = strdup(strtok(common_buff, " ,\n\t"));
+ free(common_buff);
+ common_buff = (char*) pg_malloc(512 * sizeof(char));
+ while(fgets(common_buff, 512, mask_func_file))
+ {
+ func_body_buff = psprintf("%s%s", func_body_buff, common_buff);
+ }
+
+ initPQExpBuffer(&query);
+ appendPQExpBuffer(&query, "CREATE OR REPLACE FUNCTION %s (IN elem %s, OUT res %s) RETURNS %s AS $BODY$ \nBEGIN\n%s\nRETURN;\nEND\n$BODY$ LANGUAGE %s;",
+ func_name_buff, argument_type_buff, argument_type_buff, argument_type_buff,
+ func_body_buff, func_language_buff);
+
+ /* Establishing connection to execute CREATE FUNCTION script */
+
+ strcpy(conn_params, "");
+ if(dopt->cparams.override_dbname)
+ conn_params = psprintf("%s dbname=%s", conn_params, dopt->cparams.override_dbname);
+ else
+ if(dopt->cparams.dbname)
+ conn_params = psprintf("%s dbname=%s", conn_params, dopt->cparams.dbname);
+ if(dopt->cparams.pghost)
+ conn_params = psprintf("%s host=%s", conn_params, dopt->cparams.pghost);
+ if(dopt->cparams.pgport)
+ conn_params = psprintf("%s port=%s", conn_params, dopt->cparams.pgport);
+ if(dopt->cparams.username)
+ conn_params = psprintf("%s user=%s", conn_params, dopt->cparams.username);
+ connection = PQconnectdb(conn_params);
+ PQexec(connection, query.data);
+ PQfinish(connection);
+ schema_name_buff = strtok(pg_strdup(func_name_buff), ".");
+ func_name_buff = strtok(NULL, ".");
+ if (func_name_buff == NULL) /* found function without schemaname */
+ {
+ strcpy(cur_mask_column_info->schema, "public");
+ strcpy(cur_mask_column_info->func, schema_name_buff);
+ }
+ else
+ {
+ strcpy(cur_mask_column_info->schema, schema_name_buff);
+ strcpy(cur_mask_column_info->func, func_name_buff);
+ }
+ free(conn_params);
+}
+
static void
setup_connection(Archive *AH, const char *dumpencoding,
const char *dumpsnapshot, char *use_role)
@@ -1977,6 +2191,7 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
int ret;
char *copybuf;
const char *column_list;
+ char *temp_string = (char*)malloc(256 * sizeof(char));
pg_log_info("dumping contents of table \"%s.%s\"",
tbinfo->dobj.namespace->dobj.name, classname);
@@ -1991,20 +2206,31 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
/*
* Use COPY (SELECT ...) TO when dumping a foreign table's data, and when
- * a filter condition was specified. For other cases a simple COPY
- * suffices.
+ * a filter condition was specified. OR masking of some columns is needed
+ * For other cases a simple COPY suffices.
*/
- if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE)
+ if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE
+ || mask_column_info_list.head)
{
appendPQExpBufferStr(q, "COPY (SELECT ");
/* klugery to get rid of parens in column list */
if (strlen(column_list) > 2)
{
- appendPQExpBufferStr(q, column_list + 1);
- q->data[q->len - 1] = ' ';
+ if (mask_column_info_list.head != NULL)
+ {
+ maskColumns(tbinfo, pg_strdup(column_list), &q, NULL);
+ appendPQExpBufferStr(q, " ");
+ }
+ else
+ {
+ appendPQExpBufferStr(q, column_list + 1);
+ q->data[q->len - 1] = ' ';
+ }
}
else
+ {
appendPQExpBufferStr(q, "* ");
+ }
appendPQExpBuffer(q, "FROM %s %s) TO stdout;",
fmtQualifiedDumpable(tbinfo),
@@ -2016,6 +2242,9 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
fmtQualifiedDumpable(tbinfo),
column_list);
}
+
+ free(temp_string);
+
res = ExecuteSqlQuery(fout, q->data, PGRES_COPY_OUT);
PQclear(res);
destroyPQExpBuffer(clistBuf);
@@ -2132,6 +2361,10 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
i;
int rows_per_statement = dopt->dump_inserts;
int rows_this_statement = 0;
+ /*for masking*/
+
+ SimpleStringList column_names = {NULL, NULL};
+ SimpleStringListCell *current_column;
/*
* If we're going to emit INSERTs with column names, the most efficient
@@ -2152,12 +2385,29 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
if (nfields > 0)
appendPQExpBufferStr(q, ", ");
if (tbinfo->attgenerated[i])
+ {
appendPQExpBufferStr(q, "NULL");
+ simple_string_list_append(&column_names, "NULL");
+ }
else
- appendPQExpBufferStr(q, fmtId(tbinfo->attnames[i]));
+ {
+ if (mask_column_info_list.head != NULL)
+ {
+ /*taking columns that should be masked */
+ /*char* copy_column_list = pg_strdup(tbinfo->attnames[i]);
+ char* current_column_name = strtok(copy_column_list, " ,()");*/
+ maskColumns(tbinfo, tbinfo->attnames[i], &q, &column_names);
+ }
+ else
+ {
+ appendPQExpBufferStr(q, fmtId(tbinfo->attnames[i]));
+ simple_string_list_append(&column_names, fmtId(tbinfo->attnames[i]));
+ }
+ }
attgenerated[nfields] = tbinfo->attgenerated[i];
nfields++;
}
+
/* Servers before 9.4 will complain about zero-column SELECT */
if (nfields == 0)
appendPQExpBufferStr(q, "NULL");
@@ -2215,13 +2465,14 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
/* append the list of column names if required */
if (dopt->column_inserts)
{
+ current_column = column_names.head;
appendPQExpBufferChar(insertStmt, '(');
for (int field = 0; field < nfields; field++)
{
if (field > 0)
appendPQExpBufferStr(insertStmt, ", ");
- appendPQExpBufferStr(insertStmt,
- fmtId(PQfname(res, field)));
+ appendPQExpBufferStr(insertStmt, current_column->val);
+ current_column = current_column->next;
}
appendPQExpBufferStr(insertStmt, ") ");
}
@@ -2373,6 +2624,58 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
return 1;
}
+/*
+* maskColumns - modifies SELECT queries to mask columns that need masking
+* last argument is only for INSERT case, not used in COPY case.
+*/
+
+static void
+maskColumns(TableInfo *tbinfo, char* column_list, PQExpBuffer* q, SimpleStringList* column_names)
+{
+ char* copy_column_list = pg_strdup(column_list);
+ char* current_column_name = strtok(copy_column_list, " ,()");
+ char* masked_query = (char*)pg_malloc(sizeof(char));
+
+ while (current_column_name != NULL)
+ {
+ SimplePtrListCell* mask_column_info_cell = mask_column_info_list.head;
+ MaskColumnInfo* cur_mask_column_info = (MaskColumnInfo*) mask_column_info_cell->ptr;
+ while (mask_column_info_cell != NULL &&
+ (strcmp(cur_mask_column_info->column, current_column_name) ||
+ strcmp(cur_mask_column_info->table, tbinfo->dobj.name)))
+ {
+ if (!strcmp(cur_mask_column_info->table, "") &&
+ !strcmp(cur_mask_column_info->column, current_column_name))
+ break;
+
+ mask_column_info_cell = mask_column_info_cell->next;
+ if (mask_column_info_cell)
+ cur_mask_column_info = (MaskColumnInfo*) mask_column_info_cell->ptr;
+ }
+
+ if (mask_column_info_cell != NULL)
+ {
+ /*current table name is stored in tbinfo->dobj.name*/
+ if (!strcmp(cur_mask_column_info->table, "") ||
+ !strcmp(cur_mask_column_info->table, tbinfo->dobj.name))
+ masked_query = psprintf("%s.%s(%s)", cur_mask_column_info->schema,
+ cur_mask_column_info->func, current_column_name);
+ else
+ masked_query = psprintf("%s", current_column_name);
+ }
+ else
+ masked_query = psprintf("%s", current_column_name);
+
+ if (column_names)
+ simple_string_list_append(column_names, current_column_name);
+ current_column_name = strtok(NULL, " ,()");
+ if (current_column_name != NULL)
+ masked_query = psprintf("%s, ", masked_query);
+ appendPQExpBufferStr(*q, masked_query);
+ }
+ free(masked_query);
+}
+
/*
* getRootTableInfo:
* get the root TableInfo for the given partition table.
diff --git a/src/bin/pg_dump/t/030_pg_dump_masking.pl b/src/bin/pg_dump/t/030_pg_dump_masking.pl
new file mode 100644
index 0000000000..086ad272aa
--- /dev/null
+++ b/src/bin/pg_dump/t/030_pg_dump_masking.pl
@@ -0,0 +1,253 @@
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->start;
+
+$node->safe_psql("postgres", "CREATE TABLE t0(id int, t text)");
+$node->safe_psql("postgres", "CREATE TABLE t1(id int, d timestamp)");
+$node->safe_psql("postgres", "CREATE TABLE t2(id int, r real)");
+$node->safe_psql("postgres", "CREATE TABLE t3(id int)");
+
+$node->safe_psql("postgres", "INSERT INTO t0 SELECT generate_series(1,3) AS id, md5(random()::text) AS t");
+$node->safe_psql("postgres", "INSERT INTO t1 SELECT generate_series(1,3) AS id,
+ NOW() + (random() * (interval '90 days')) + '30 days' AS d");
+$node->safe_psql("postgres", "INSERT INTO t2 SELECT generate_series(1,3) AS id, random() * 100 AS r");
+$node->safe_psql("postgres", "INSERT INTO t3 SELECT generate_series(1,3) AS id");
+
+$node->safe_psql("postgres", "CREATE SCHEMA test_schema");
+
+#masking functions
+
+my %functions = (
+ 'mask_int' => {
+ func_name => 'mask_int',
+ code => 'res := -1',
+ param_type => 'integer',
+ },
+ 'mask_int_with_schema' => {
+ func_name => 'test_schema.mask_int_with_schema',
+ code => 'res := -2',
+ param_type => 'integer',
+ },
+ 'mask_real' => {
+ func_name => 'mask_real',
+ code => 'res := -1.5',
+ param_type => 'real',
+ },
+ 'mask_text' => {
+ func_name => 'mask_text',
+ code => 'res := \'*****\'',
+ param_type => 'text',
+ },
+ 'mask_timestamp' => {
+ func_name => 'mask_timestamp',
+ code => 'res := \'1970-01-01 00:00:00\'',
+ param_type => 'timestamp',
+ },
+);
+
+foreach my $function (sort keys %functions)
+{
+ my $query = sprintf "CREATE OR REPLACE FUNCTION %s (IN elem %s, OUT res %s) RETURNS %s AS
+ \$BODY\$
+ BEGIN
+ %s;
+ RETURN;
+ END
+ \$BODY\$ LANGUAGE plpgsql;", $functions{$function}->{func_name}, $functions{$function}->{param_type},
+ $functions{$function}->{param_type}, $functions{$function}->{param_type}, $functions{$function}->{code};
+ $node->safe_psql("postgres", $query);
+}
+
+my %tests = (
+ 'test_mask_all_ids' => {
+ regexp => qr/^
+ \QCOPY public.t0 (id, t) FROM stdin;\E\n
+ (-1\s*\w*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t1 (id, d) FROM stdin;\E\n
+ (-1\s*\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\.\d*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t2 (id, r) FROM stdin;\E\n
+ (-1\s*\d*\.\d*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t3 (id) FROM stdin;\E\n
+ (-1\s*\n){3}
+ \Q\.\E
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_all_ids.sql",
+ '--mask-columns', '"id"',
+ '--mask-function', 'mask_int']
+ },
+ 'test_mask_some_ids' => {
+ regexp => qr/^
+ \QCOPY public.t0 (id, t) FROM stdin;\E\n
+ (-1\s*\w*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t1 (id, d) FROM stdin;\E\n
+ (-1\s*\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\.\d*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t2 (id, r) FROM stdin;\E\n
+ 1\s*\d*\.\d*\n2\s*\d*\.\d*\n3\s*\d*\.\d*\n
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t3 (id) FROM stdin;\E\n
+ 1\s*\n2\s*\n3\s*\n
+ \Q\.\E
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_some_ids.sql",
+ '--mask-columns', '"t0.id, t1.id"',
+ '--mask-function', 'mask_int']
+ },
+ 'test_mask_different_types' => {
+ regexp => qr/^
+ \QCOPY public.t0 (id, t) FROM stdin;\E\n
+ 1\s*\*{5}\n2\s*\*{5}\n3\s*\*{5}\n
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t1 (id, d) FROM stdin;\E\n
+ 1\s*\Q1970-01-01 00:00:00\E\n2\s*\Q1970-01-01 00:00:00\E\n3\s*\Q1970-01-01 00:00:00\E\n
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t2 (id, r) FROM stdin;\E\n
+ 1\s*\Q-1.5\E\n2\s*\Q-1.5\E\n3\s*\Q-1.5\E\n
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t3 (id) FROM stdin;\E\n
+ 1\s*\n2\s*\n3\s*\n
+ \Q\.\E
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_different_types.sql",
+ '--mask-columns', 't',
+ '--mask-function', 'mask_text',
+ '--mask-columns', 'd',
+ '--mask-function', 'mask_timestamp',
+ '--mask-columns', 'r',
+ '--mask-function', 'mask_real']
+ },
+ 'test_mask_ids_with_schema' => {
+ regexp => qr/^
+ \QCOPY public.t0 (id, t) FROM stdin;\E\n
+ (-2\s*\w*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t1 (id, d) FROM stdin;\E\n
+ (-2\s*\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\.\d*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t2 (id, r) FROM stdin;\E\n
+ (-2\s*\d*\.\d*\n){3}
+ \Q\.\E\n
+ (.|\n)*
+ \QCOPY public.t3 (id) FROM stdin;\E\n
+ (-2\s*\n){3}
+ \Q\.\E
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_ids_with_schema.sql",
+ '--mask-columns', 'id',
+ '--mask-function', 'test_schema.mask_int_with_schema']
+ },
+ 'test_mask_ids_file' => {
+ regexp => qr/^
+ \QCOPY public.t0 (id, t) FROM stdin;\E\n
+ (-3\s*\w*\n){3}
+ \Q\.\E
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_ids_file.sql",
+ '-t', 't0',
+ '--mask-columns', 'id',
+ '--mask-function', "$tempdir/mask_ids.sql"]
+ },
+ 'test_mask_ids_insert' => {
+ regexp => qr/^
+ (\QINSERT INTO public.t0 (id, t) VALUES (-1, \E\'\w*\'\Q);\E\n){3}
+ /xm,
+ dump => [
+ 'pg_dump',
+ 'postgres',
+ '-f', "$tempdir/test_mask_ids_insert.sql",
+ '-t', 't0',
+ '--column-insert',
+ '--mask-columns', 'id',
+ '--mask-function', 'mask_int']
+ },
+);
+
+open my $fileHandle, ">", "$tempdir/mask_ids.sql";
+print $fileHandle "f_int\ninteger\nplpgsql\nres := -3;";
+close ($fileHandle);
+
+open $fileHandle, ">", "$tempdir/mask_drop_table.sql";
+print $fileHandle "f_int\ninteger\nplpgsql\nDROP TABLE t0;\nres := -3;";
+close ($fileHandle);
+
+open $fileHandle, ">", "$tempdir/mask_grant.sql";
+print $fileHandle "f_int\ninteger\nplpgsql\nres := -3;\nGRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO tester;";
+close ($fileHandle);
+
+foreach my $test (sort keys %tests)
+{
+ $node->command_ok(\@{ $tests{$test}->{dump} },"$test: pg_dump runs");
+
+ my $output_file = slurp_file("$tempdir/${test}.sql");
+
+ ok($output_file =~ $tests{$test}->{regexp}, "$test: should be dumped");
+}
+
+#security test - it shouldn't be possible to execute DROP TABLE during dump
+
+$node->command_fails_like(
+ ['pg_dump', 'postgres', '-f', "$tempdir/test_mask_ids_file.sql",
+ '-t', 't0', '--mask-columns', 'id', '--mask-function', "$tempdir/mask_drop_table.sql" ],
+ qr/\Qg_dump: error: Dumping the contents of table "t0" failed: PQgetResult() failed.
+pg_dump: detail: Error message from server: ERROR: cannot execute DROP TABLE in a read-only transaction
+CONTEXT: SQL statement "DROP TABLE t0"
+PL\/pgSQL function public.f_int(integer) line 3 at SQL statement
+pg_dump: detail: Command was: COPY (SELECT public.f_int(id), t FROM public.t0 ) TO stdout;\E/,
+ 'trying to drop table during dump');
+
+#security test - it shouldn't be possible to execute GRANT during dump
+
+$node->safe_psql("postgres", "CREATE USER tester");
+
+$node->command_fails_like(
+ ['pg_dump', 'postgres', '-f', "$tempdir/test_mask_ids_file.sql",
+ '-t', 't0', '--mask-columns', 'id', '--mask-function', "$tempdir/mask_grant.sql" ],
+ qr/\Qpg_dump: error: Dumping the contents of table "t0" failed: PQgetResult() failed.
+pg_dump: detail: Error message from server: ERROR: cannot execute GRANT in a read-only transaction
+CONTEXT: SQL statement "GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO tester"
+PL\/pgSQL function public.f_int(integer) line 4 at SQL statement
+pg_dump: detail: Command was: COPY (SELECT public.f_int(id), t FROM public.t0 ) TO stdout;\E/,
+ 'trying to drop table during dump');
+
+
+done_testing();
\ No newline at end of file