Here's a patch implementing custom parsers for data types mentioned in http://archives.postgresql.org/pgsql-hackers/2010-12/msg01991.php. It's an incremental patch on top of the plpython-refactor patch sent eariler.
Git branch for this patch: https://github.com/wulczer/postgres/tree/custom-parsers. The idea has been discussed in http://archives.postgresql.org/pgsql-hackers/2010-12/msg01307.php. With that patch, when built with --with-python, the hstore module includes code that adds a GUC called plpython.hstore. This GUC should be set to the full name of the hstore datatype, for instance plpython.hstore = 'public.hstore'. If it is set, the datatype's OID is looked up and hstore sets up a rendezvous variable called PLPYTHON_<OID>_PARSERS that points to two functions that can convert a hstore Datum to a PyObject and back. PL/Python ot the other hand when it sees an argument with an unknown type tries to look up a rendezvous variable using the type's OID and if it finds it, it uses the parser functions pointed at by that variable. Long story short, it works so: LOAD 'hstore'; SET plpython.hstore = 'public.hstore' CREATE FUNCTION pick_one(h hstore, key text) RETURNS hstore AS $$ return {key: h[key]} $$ LANGUAGE plpythonu; SELECT pick_one('a=>3,b=>4', 'b') -- gives bask a hstore 'b=>4' There's some ugliness with how hstore's Makefile handles building it, and I'm not sure what's needed to make it work with the Windows build system. Also, documentation is missing. It's already usable, but if we decide to commit that, I'll probably need some help with Windows and docs. I first tried to make hstore generate a separate .so with that functionality if --with-python was specified, but couldn't convince the Makefile to do that. So if you configure the tree with --with-python, hstore will link to libpython, maybe that's OK? Cheers, Jan PS: of course, once committed we can add custom parsers for isbn, citext, uuids, cubes, and other weird things. J
diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile index e466b6f..dbeeb89 100644 *** a/contrib/hstore/Makefile --- b/contrib/hstore/Makefile *************** top_builddir = ../.. *** 5,12 **** include $(top_builddir)/src/Makefile.global MODULE_big = hstore OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \ ! crc32.o DATA_built = hstore.sql DATA = uninstall_hstore.sql --- 5,21 ---- include $(top_builddir)/src/Makefile.global MODULE_big = hstore + OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \ ! hstore_plpython.o crc32.o ! ! ifeq ($(with_python),yes) ! ! PG_CPPFLAGS := -I$(srcdir) -I$(top_builddir)/src/pl/plpython \ ! $(python_includespec) -DHSTORE_PLPYTHON_SUPPORT ! SHLIB_LINK = $(python_libspec) $(python_additional_libs) \ ! $(filter -lintl,$(LIBS)) $(CPPFLAGS) ! endif DATA_built = hstore.sql DATA = uninstall_hstore.sql diff --git a/contrib/hstore/hstore.h b/contrib/hstore/hstore.h index 8906397..6edfc70 100644 *** a/contrib/hstore/hstore.h --- b/contrib/hstore/hstore.h *************** extern Pairs *hstoreArrayToPairs(ArrayTy *** 174,179 **** --- 174,182 ---- #define HStoreExistsAllStrategyNumber 11 #define HStoreOldContainsStrategyNumber 13 /* backwards compatibility */ + /* PL/Python support */ + extern void hstore_plpython_init(void); + /* * defining HSTORE_POLLUTE_NAMESPACE=0 will prevent use of old function names; * for now, we default to on for the benefit of people restoring old dumps diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 0d6f0b6..92c8db9 100644 *** a/contrib/hstore/hstore_io.c --- b/contrib/hstore/hstore_io.c *************** PG_MODULE_MAGIC; *** 20,25 **** --- 20,26 ---- /* old names for C functions */ HSTORE_POLLUTE(hstore_from_text, tconvert); + void _PG_init(void); typedef struct { *************** hstore_send(PG_FUNCTION_ARGS) *** 1211,1213 **** --- 1212,1220 ---- PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } + + void + _PG_init(void) + { + hstore_plpython_init(); + } diff --git a/contrib/hstore/hstore_plpython.c b/contrib/hstore/hstore_plpython.c index ...081a33e . *** a/contrib/hstore/hstore_plpython.c --- b/contrib/hstore/hstore_plpython.c *************** *** 0 **** --- 1,249 ---- + /* + * contrib/src/hstore_plpython.c + * + * bidirectional transformation between hstores and Python dictionary objects + */ + + /* Only build if PL/Python support is needed */ + #if defined(HSTORE_PLPYTHON_SUPPORT) + + #if defined(_MSC_VER) && defined(_DEBUG) + /* Python uses #pragma to bring in a non-default libpython on VC++ if + * _DEBUG is defined */ + #undef _DEBUG + /* Also hide away errcode, since we load Python.h before postgres.h */ + #define errcode __msvc_errcode + #include <Python.h> + #undef errcode + #define _DEBUG + #elif defined (_MSC_VER) + #define errcode __msvc_errcode + #include <Python.h> + #undef errcode + #else + #include <Python.h> + #endif + + #include "postgres.h" + #include "utils/guc.h" + #include "utils/builtins.h" + #include "utils/syscache.h" + #include "catalog/namespace.h" + + #include "plpython.h" + #include "hstore.h" + + static Oid get_hstore_oid(const char *name); + static void set_hstore_parsers(Oid); + + static PyObject *hstore_to_dict(void *, Datum); + static Datum dict_to_hstore(void *, int32, PyObject *); + + /* GUC variables */ + + static char *hstore_name; + + /* Previous hstore OID */ + + static Oid previous; + + PLyParsers parsers = { + .in = hstore_to_dict, + .out = dict_to_hstore + }; + + static PyObject * + hstore_to_dict(void *ignored, Datum d) + { + HStore *hstore = DatumGetHStoreP(d); + char *base; + HEntry *entries; + int count; + int i; + PyObject *ret; + + base = STRPTR(hstore); + entries = ARRPTR(hstore); + + ret = PyDict_New(); + + count = HS_COUNT(hstore); + + for (i = 0; i < count; i++) + { + PyObject *key, *val; + + key = PyString_FromStringAndSize(HS_KEY(entries, base, i), + HS_KEYLEN(entries, i)); + if (HS_VALISNULL(entries, i)) { + Py_INCREF(Py_None); + val = Py_None; + } + else { + val = PyString_FromStringAndSize(HS_VAL(entries, base, i), + HS_VALLEN(entries, i)); + } + + PyDict_SetItem(ret, key, val); + } + + return ret; + } + + static Datum + dict_to_hstore(void *ignored, int32 typmod, PyObject *dict) + { + HStore *hstore; + int pcount; + Pairs *pairs; + PyObject *key; + PyObject *value; + Py_ssize_t pos; + char *keys; + char *vals; + int keylen; + int vallen; + int buflen; + int i; + + if (!PyDict_Check(dict)) + ereport(ERROR, + (errmsg("hstores can only be constructed " + "from Python dictionaries"))); + + pcount = PyDict_Size(dict); + pairs = palloc(pcount * sizeof(Pairs)); + pos = i = 0; + /* loop over the dictionary, creating a Pair for each key/value pair */ + while (PyDict_Next(dict, &pos, &key, &value)) { + if (!PyString_Check(key)) + elog(ERROR, "hstore keys have to be strings"); + + PyString_AsStringAndSize(key, &keys, &keylen); + + if (strlen(keys) != keylen) + elog(ERROR, "hstore keys cannot contain NUL bytes"); + + pairs[i].key = pstrdup(keys); + pairs[i].keylen = hstoreCheckKeyLen(keylen); + pairs[i].needfree = true; + + if (value == Py_None) { + pairs[i].val = NULL; + pairs[i].vallen = 0; + pairs[i].isnull = true; + } + else { + if (!PyString_Check(value)) + elog(ERROR, "hstore values have to be strings"); + + PyString_AsStringAndSize(value, &vals, &vallen); + + if (strlen(vals) != vallen) + elog(ERROR, "hstore values cannot contain NUL bytes"); + + pairs[i].val = pstrdup(vals); + pairs[i].vallen = hstoreCheckValLen(vallen); + pairs[i].isnull = false; + } + + i++; + } + pcount = hstoreUniquePairs(pairs, pcount, &buflen); + hstore = hstorePairs(pairs, pcount, buflen); + + return PointerGetDatum(hstore); + } + + static const char * + recheck_hstore_oid(const char *newvalue, bool doit, GucSource source) + { + Oid hstore_oid; + + if (newvalue == NULL) + return NULL; + + hstore_oid = get_hstore_oid(newvalue); + + if (*newvalue && !OidIsValid(hstore_oid)) + return NULL; + + if (doit) + set_hstore_parsers(hstore_oid); + + return newvalue; + } + + void + hstore_plpython_init(void) + { + DefineCustomStringVariable("plpython.hstore", + "The fully qualified name of the hstore type.", + NULL, + &hstore_name, + NULL, + PGC_SUSET, + 0, + recheck_hstore_oid, + NULL); + + EmitWarningsOnPlaceholders("plpython"); + + previous = InvalidOid; + + if (hstore_name && *hstore_name) + recheck_hstore_oid(hstore_name, true, PGC_S_FILE); + } + + static Oid + get_hstore_oid(const char *name) + { + text *text_name; + List *hstore_name; + char *type_name; + Oid type_namespace; + Oid typoid; + + Assert(name != NULL); + + if (!(*name)) + return InvalidOid; + + text_name = cstring_to_text(name); + hstore_name = textToQualifiedNameList(text_name); + pfree(text_name); + + type_namespace = QualifiedNameGetCreationNamespace(hstore_name, &type_name); + + typoid = GetSysCacheOid2(TYPENAMENSP, + CStringGetDatum(type_name), + ObjectIdGetDatum(type_namespace)); + + return typoid; + } + + static void + set_hstore_parsers(Oid hstore_oid) + { + char name[NAMEDATALEN]; + + if (OidIsValid(previous)) + { + snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, previous); + *find_rendezvous_variable(name) = NULL; + } + + if (OidIsValid(hstore_oid)) + { + snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, hstore_oid); + *find_rendezvous_variable(name) = &parsers; + previous = hstore_oid; + } + } + + #else /* !defined(HSTORE_PLPYTHON_SUPPORT) */ + + void + hstore_plpython_init(void) {}; + + #endif /* defined(HSTORE_PLPYTHON_SUPPORT) */ diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c index 67eb0f3..a4d3528 100644 *** a/src/pl/plpython/plpython.c --- b/src/pl/plpython/plpython.c *************** typedef int Py_ssize_t; *** 90,95 **** --- 90,97 ---- #include <fcntl.h> /* postgreSQL stuff */ + #include "plpython.h" + #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "commands/trigger.h" *************** static PyObject *PLyList_FromArray(PLyDa *** 347,352 **** --- 349,357 ---- static PyObject *PLyDict_FromTuple(PLyTypeInfo *, HeapTuple, TupleDesc); + static PLyParserIn PLy_get_custom_input_function(Oid oid); + static PLyParserOut PLy_get_custom_output_function(Oid oid); + static Datum PLyObject_ToBool(PLyObToDatum *, int32, PyObject *); static Datum PLyObject_ToBytea(PLyObToDatum *, int32, PyObject *); static Datum PLyObject_ToDatum(PLyObToDatum *, int32, PyObject *); *************** PLy_output_datum_func2(PLyObToDatum *arg *** 1789,1794 **** --- 1794,1800 ---- { Form_pg_type typeStruct = (Form_pg_type) GETSTRUCT(typeTup); Oid element_type; + Oid argument_type; perm_fmgr_info(typeStruct->typinput, &arg->typfunc); arg->typoid = HeapTupleGetOid(typeTup); *************** PLy_output_datum_func2(PLyObToDatum *arg *** 1796,1807 **** arg->typbyval = typeStruct->typbyval; element_type = get_element_type(arg->typoid); /* * Select a conversion function to convert Python objects to PostgreSQL * datums. Most data types can go through the generic function. */ ! switch (getBaseType(element_type ? element_type : arg->typoid)) { case BOOLOID: arg->func = PLyObject_ToBool; --- 1802,1814 ---- arg->typbyval = typeStruct->typbyval; element_type = get_element_type(arg->typoid); + argument_type = getBaseType(element_type ? element_type : arg->typoid); /* * Select a conversion function to convert Python objects to PostgreSQL * datums. Most data types can go through the generic function. */ ! switch (argument_type) { case BOOLOID: arg->func = PLyObject_ToBool; *************** PLy_output_datum_func2(PLyObToDatum *arg *** 1810,1816 **** arg->func = PLyObject_ToBytea; break; default: ! arg->func = PLyObject_ToDatum; break; } --- 1817,1829 ---- arg->func = PLyObject_ToBytea; break; default: ! /* Last ditch effort of finding a rendezvous variable pointing to ! * a parser function, useful for extension modules plugging in ! * their own parsers ! */ ! arg->func = (PLyObToDatumFunc) PLy_get_custom_output_function(argument_type); ! if (arg->func == NULL) ! arg->func = PLyObject_ToDatum; break; } *************** PLy_input_datum_func2(PLyDatumToOb *arg, *** 1852,1857 **** --- 1865,1871 ---- { Form_pg_type typeStruct = (Form_pg_type) GETSTRUCT(typeTup); Oid element_type = get_element_type(typeOid); + Oid argument_type; /* Get the type's conversion information */ perm_fmgr_info(typeStruct->typoutput, &arg->typfunc); *************** PLy_input_datum_func2(PLyDatumToOb *arg, *** 1861,1868 **** arg->typlen = typeStruct->typlen; arg->typalign = typeStruct->typalign; /* Determine which kind of Python object we will convert to */ ! switch (getBaseType(element_type ? element_type : typeOid)) { case BOOLOID: arg->func = PLyBool_FromBool; --- 1875,1884 ---- arg->typlen = typeStruct->typlen; arg->typalign = typeStruct->typalign; + argument_type = getBaseType(element_type ? element_type : typeOid); + /* Determine which kind of Python object we will convert to */ ! switch (argument_type) { case BOOLOID: arg->func = PLyBool_FromBool; *************** PLy_input_datum_func2(PLyDatumToOb *arg, *** 1889,1895 **** arg->func = PLyBytes_FromBytea; break; default: ! arg->func = PLyString_FromDatum; break; } --- 1905,1917 ---- arg->func = PLyBytes_FromBytea; break; default: ! /* Last ditch effort of finding a rendezvous variable pointing to ! * a parser function, useful for extension modules plugging in ! * their own parsers ! */ ! arg->func = (PLyDatumToObFunc) PLy_get_custom_input_function(argument_type); ! if (arg->func == NULL) ! arg->func = PLyString_FromDatum; break; } *************** PLy_typeinfo_dealloc(PLyTypeInfo *arg) *** 1930,1935 **** --- 1952,1991 ---- } } + /* + * Getting the parser functions from a rendezvous variable set by another + * extension. + */ + static PLyParserIn + PLy_get_custom_input_function(Oid oid) + { + PLyParsers *parsers; + char name[NAMEDATALEN]; + + snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, oid); + parsers = *find_rendezvous_variable(name); + + if (parsers == NULL) + return NULL; + + return parsers->in; + } + + static PLyParserOut + PLy_get_custom_output_function(Oid oid) + { + PLyParsers *parsers; + char name[NAMEDATALEN]; + + snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, oid); + parsers = *find_rendezvous_variable(name); + + if (parsers == NULL) + return NULL; + + return parsers->out; + } + static PyObject * PLyBool_FromBool(PLyDatumToOb *arg, Datum d) { diff --git a/src/pl/plpython/plpython.h b/src/pl/plpython/plpython.h index ...53d25b7 . *** a/src/pl/plpython/plpython.h --- b/src/pl/plpython/plpython.h *************** *** 0 **** --- 1,40 ---- + /* + * src/pl/plpython/plpython.h + */ + #ifndef __PLPYTHON_H__ + #define __PLPYTHON_H__ + + + + /* + * Rendezvous variable pattern for parsers exported from other extensions + * + * An extension providing parsres for type X should look up the type's OID and + * set a rendezvous variable using this pattern that points to a PLyParsers + * structure. PL/Python will then use these parsers for arguments with that + * OID. + */ + #define PARSERS_VARIABLE_PATTERN "plpython_%u_parsers" + + /* + * Types for parsres functions that other modules can export to transform + * Datums into PyObjects and back. The types need to be compatible with + * PLyObToDatumFunc and PLyDatumToObFunc, but we don't want to expose too much + * of plpython.c's guts here, so the first arguments is mandated to be a void + * pointer that should not be touched. An extension should know exactly what + * it's dealing with, so there's no need for it to look at anything contained + * in PLyTypeInfo, which is what gets passed here. + * + * The output parser also gets the type's typmod, which might actually be + * useful. + */ + typedef PyObject *(*PLyParserIn) (void *, Datum); + typedef Datum (*PLyParserOut) (void *, int32, PyObject *); + + typedef struct PLyParsers + { + PLyParserIn in; + PLyParserOut out; + } PLyParsers; + + #endif /* __PLPYTHON_H__ */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers