On Sun, Feb 6, 2011 at 15:31, Andrew Dunstan <and...@dunslane.net> wrote: > Force strings passed to and from plperl to be in UTF8 encoding. > > String are converted to UTF8 on the way into perl and to the > database encoding on the way back. This avoids a number of > observed anomalies, and ensures Perl a consistent view of the > world.
So I noticed a problem while playing with this in my discussion with David Wheeler. pg_do_encoding() does nothing when the src encoding == the dest encoding. That means on a UTF-8 database we fail make sure our strings are valid utf8. An easy way to see this is to embed a null in the middle of a string: => create or replace function zerob() returns text as $$ return "abcd\0efg"; $$ language plperl; => SELECT zerob(); abcd Also It seems bogus to bogus to do any encoding conversion when we are SQL_ASCII, and its really trivial to fix. With the attached: - when we are on a utf8 database make sure to verify our output string in sv2cstr (we assume database strings coming in are already valid) - Do no string conversion when we are SQL_ASCII in or out - add plperl_helpers.h as a dep to plperl.o in our makefile - remove some redundant calls to pg_verify_mbstr() - as utf_e2u only as one caller dont pstrdup() instead have the caller check (saves some cycles and memory)
*** a/doc/src/sgml/plperl.sgml --- b/doc/src/sgml/plperl.sgml *************** *** 127,135 **** $$ LANGUAGE plperl; <note> <para> ! Arguments will be converted from the database's encoding to UTF-8 ! for use inside plperl, and then converted from UTF-8 back to the ! database encoding upon return. </para> </note> --- 127,136 ---- <note> <para> ! Arguments will be converted from the database's encoding to ! UTF-8 for use inside plperl, and then converted from UTF-8 back ! to the database encoding upon return. Unless the database ! encoding is SQL_ASCII, then no conversion is done. </para> </note> *** a/src/pl/plperl/GNUmakefile --- b/src/pl/plperl/GNUmakefile *************** *** 54,60 **** PSQLDIR = $(bindir) include $(top_srcdir)/src/Makefile.shlib ! plperl.o: perlchunks.h plperl_opmask.h plperl_opmask.h: plperl_opmask.pl $(PERL) $< $@ --- 54,60 ---- include $(top_srcdir)/src/Makefile.shlib ! plperl.o: perlchunks.h plperl_opmask.h plperl_helpers.h plperl_opmask.h: plperl_opmask.pl $(PERL) $< $@ *** a/src/pl/plperl/plperl.c --- b/src/pl/plperl/plperl.c *************** *** 2308,2315 **** plperl_spi_exec(char *query, int limit) { int spi_rv; - pg_verifymbstr(query, strlen(query), false); - spi_rv = SPI_execute(query, current_call_data->prodesc->fn_readonly, limit); ret_hv = plperl_spi_execute_fetch_result(SPI_tuptable, SPI_processed, --- 2308,2313 ---- *************** *** 2555,2563 **** plperl_spi_query(char *query) void *plan; Portal portal; - /* Make sure the query is validly encoded */ - pg_verifymbstr(query, strlen(query), false); - /* Create a cursor for the query */ plan = SPI_prepare(query, 0, NULL); if (plan == NULL) --- 2553,2558 ---- *************** *** 2767,2775 **** plperl_spi_prepare(char *query, int argc, SV **argv) qdesc->argtypioparams[i] = typIOParam; } - /* Make sure the query is validly encoded */ - pg_verifymbstr(query, strlen(query), false); - /************************************************************ * Prepare the plan and check for errors ************************************************************/ --- 2762,2767 ---- *** a/src/pl/plperl/plperl_helpers.h --- b/src/pl/plperl/plperl_helpers.h *************** *** 20,27 **** static inline char * utf_e2u(const char *str) { char *ret = (char*)pg_do_encoding_conversion((unsigned char*)str, strlen(str), GetDatabaseEncoding(), PG_UTF8); - if (ret == str) - ret = pstrdup(ret); return ret; } --- 20,25 ---- *************** *** 34,46 **** sv2cstr(SV *sv) { char *val; STRLEN len; /* ! * get a utf8 encoded char * out of perl. *note* it may not be valid utf8! */ val = SvPVutf8(sv, len); /* * we use perls length in the event we had an embedded null byte to ensure * we error out properly */ --- 32,59 ---- { char *val; STRLEN len; + int enc = GetDatabaseEncoding(); /* ! * we dont do any conversion when SQL_ASCII */ + if (enc == PG_SQL_ASCII) + { + val = SvPV(sv, len); + return pstrdup(val); + } + val = SvPVutf8(sv, len); /* + * when the src encoding matches the dest encoding pg_do_encoding will not + * do any verification. SvPVutf8() may return invalid utf8 so we need to do + * that ourselves + */ + if (enc == PG_UTF8) + pg_verifymbstr(val, len, false); + + /* * we use perls length in the event we had an embedded null byte to ensure * we error out properly */ *************** *** 56,67 **** static inline SV * cstr2sv(const char *str) { SV *sv; ! char *utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); ! pfree(utf8_str); return sv; } --- 69,91 ---- cstr2sv(const char *str) { SV *sv; ! char *utf8_str; + /* + * we dont do any conversion when SQL_ASCII leave it as byte soup + */ + if (GetDatabaseEncoding() != PG_SQL_ASCII) + return newSVpv(str, 0); + + /* + * for all other encodings we convert it to utf8 + */ + utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); ! if (utf8_str != str) ! pfree(utf8_str); return sv; }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers