Hello!

bytea-test.cxx -- contains detailed description of the bug and test
plan and code.

varlena.c.diff -- patch for PostgreSQL 7.5devel
src/backend/utils/adt/varlena.c#byteaout
fe-exec.c.diff -- patch (optional) for PostgreSQL 7.5devel
src/interface/libpq/fe-exec.c#PQescapeBytea

test.data -- contains test data :-))

-- 
Sergey N. Yatskevich <[EMAIL PROTECTED]>
GosNIIAS
/**
 * @file
 * @brief This test show bug in byteaout PostgreSQL code
 * @author Sergey N. Yatskevich
 *
 * If we have a different client and server encodings and client and server locales are
 * not C(ASCII) then path of bytea string for client->server transfer is:
 *   -# encode binary data on client with PQescapeBytea (encode in \\ooo form all 
symbols with
 *      code == 0x0 && code >= 0x80)
 *   -# send encoded data to the server
 *   -# decode recieved characters with pg_client_to_server in
 *      src/backend/libpq/pqformat.c#pq_getmsgstring
 *   -# decode bytea C-string with byteain
 *   .
 * and path for server->client transfer is:
 *   -# encode bytea into C-string with byteaout (encode in \\ooo form all symbols with
 *      !isprint(symbol code) for current server locale)
 *   -# encode query result characters with pq_server_to_client in
 *      src/backend/libpq/pqformat.c#pq_sendcountedtext
 *   -# send encoded data to the client
 *   -# decode recieved data with PQunescapeBytea
 *
 * If we have the next client-server configuration:
 *
@verbatim
                     +---------------------+
                     | KOI8 (ru_RU.KOI8-R) |
                     |                     |
                     |       Server        |
                     +--+----------------+-+
                        ^                |
                        |                |
                        |                v
             +----------+----------+  +------------------+
             | KOI8 (ru_RU.KOI8-R) |  |   WIN (CP1251)   |
             |                     |  |                  |
             |   Client (Linux)    |  | Client (Windows) |
             +---------------------+  +------------------+
@endverbatim
 *
 * then, for example, symbol RUSSIAN_A (code 255) from Linux-client will be translated
 * in database into the symbol with code 255, and then for Windows client --- into the
 * symbol with code 192, that is wrong for bytea data type !!!!
 *
 * In case when database has UNICODE encoding some parts of bytea strings from server 
will
 * not be even send to the client, because some symbol chains in current server locale 
don't
 * present valid utf8 sequence and can't be translated to client encoding properly.
 *
 * Simplest way to solve this problem is to replace isprint check in byteaout and
 * >= 0x80 check in PQescapeBytea procedures with isascii && isprint checks, because
 * ASCII symbols for all locales (and database encodings) have the same byte codes.
 *
 * Or you may do in byteaout the same symbol check as in PQescapeBytea (encode in \\ooo
 * form all symbols with code >= 0x80).
 *
 * But I prefer first way (with isascii && isprint check in both byteaout and 
PQesacpeBytea),
 * because it produce nice printable and editable ASCII dump output for debug :-)).
 *
 * Test steps:
 *    -# compile program with command (for gcc 3.2.3):
 *       <tt>g++ -Wall -pedantic -I`pg_config --includedir` bytea-test.cxx -o 
bytea-test -lpq</tt>
 *    -# init database cluster with non C(ASCII) locale (for example ru_RU.KOI8-R)
 *    -# create test database with non SQL_ASCII encoding (for example KOI8)
 *    -# create test table in this database with command: <tt>CREATE TABLE bytea_test 
(data BYTEA);</tt>
 *    -# run test with command: <tt>./bytea-test test.data</tt>
 *    -# try the three prevous steps with UNICODE database
 *
 * Then apply patches (at least varlena.c.diff) and run test again. All must be done
 * successfully.
 *
 * @note @c bytea_test table must be available for deleting, inserting and selecting
 *
 * @note Attatched test file (test.data) contains sequence of all 256 8-bit symbols.
 *
 * @bug I am very sorry for my bad english, but I hope you understand me :-))
 */
#include <iostream>
#include <fstream>
#include <iterator>
#include <vector>

#include <libpq-fe.h>

using namespace std;

int
main (int _argc, char **_argv) {

        // Check arguments count
        if (_argc != 3) {
                cerr << "Usage: " << _argv[0] << " <database_name> <test_file>\n";
                return 1;
        }

        // Set up the database connection
        PGconn *conn = PQsetdb (NULL, NULL, NULL, NULL, _argv[1]);
        if (PQstatus (conn) == CONNECTION_BAD) {
                cerr << "Can't connect to database " << _argv[1] << " (" << 
PQerrorMessage (conn) << ")\n";
                PQfinish (conn);
                return 1;
        }

        // First client encoding
        PQsetClientEncoding (conn, "KOI8");
        if (PQstatus (conn) == CONNECTION_BAD) {
                cerr << "Can't set client encoding for database " << _argv[1] << " (" 
<< PQerrorMessage (conn) << ")\n";
                PQfinish (conn);
                return 1;
        }

        // Open binary file stream
        ifstream is (_argv[2], ios::binary);
        if (!is.good ()) {
                cerr << "Can't open test file (" << _argv[2] << ")\n";
                PQfinish (conn);
                return 1;
        }

        // Load binary file into memory
        vector<u_int8_t> bin;
        copy (istream_iterator<u_int8_t> (is), istream_iterator<u_int8_t> (), 
back_inserter (bin));

        // Clean test table
        PGresult *res = PQexec (conn, "DELETE FROM bytea_test");
        if (PQresultStatus (res) != PGRES_COMMAND_OK) {
                PQclear (res);
                cerr << "Can't create test table (" << PQresultErrorMessage (res) << 
")\n";
                PQfinish (conn);
                return 1;
        }
        PQclear (res);

        // Convert bin array into escaped string
        size_t escaped_bin_len = 0;
        unsigned char *escaped_bin = PQescapeBytea (&bin.front (), bin.size (), 
&escaped_bin_len);
        cout << "\nSend to server: " << escaped_bin << endl << flush;
        // Construct insert query
        string insert_q = string ("INSERT INTO bytea_test VALUES ('") +
                          string (reinterpret_cast<char *> (escaped_bin), 
escaped_bin_len - 1) +
                          string ("')");
        // Free no more need memory
        free (escaped_bin);

        // Insert bytea data into database
        res = PQexec (conn, insert_q.data ());
        if (PQresultStatus (res) != PGRES_COMMAND_OK) {
                PQclear (res);
                cerr << "Can't insert data into test table (" << PQresultErrorMessage 
(res) << ")\n";
                PQfinish (conn);
                return 1;
        }
        PQclear (res);

        // Second client encoding
        PQsetClientEncoding (conn, "WIN");
        if (PQstatus (conn) == CONNECTION_BAD) {
                cerr << "Can't set client encoding for database " << _argv[1] << " (" 
<< PQerrorMessage (conn) << ")\n";
                PQfinish (conn);
                return 1;
        }

        // Get back bin array from database
        res = PQexec (conn, "SELECT data FROM bytea_test");
        if ((PQresultStatus (res) != PGRES_TUPLES_OK) || (PQntuples (res) == 0)) {
                PQclear (res);
                cerr << "Can't get data from test table (" << PQresultErrorMessage 
(res) << ")\n";
                PQfinish (conn);
                return 1;
        }
        cout << "\nRecieve from server: " << PQgetvalue (res, 0, 0) << endl << flush;

        // Convert result into binary form
        size_t unescaped_bin_len = 0;
        unsigned char *unescaped_bin = PQunescapeBytea (reinterpret_cast<unsigned char 
*> (PQgetvalue (res, 0, 0)), &unescaped_bin_len);
        // Construct binary array
        vector<u_int8_t> bin2;
        copy (unescaped_bin, unescaped_bin + unescaped_bin_len, back_inserter (bin2));
        // Free no more need memory
        free (unescaped_bin);

        // Clear result
        PQclear (res);
        // Close connection
        PQfinish (conn);

        // Compare binary arrays size
        if (bin.size () != bin2.size ()) {
                cerr << "ERROR: Binary arrays have different size\n";
                return 1;
        }

        // Compare binary arrays data
        for (size_t i = 0; i < bin.size (); i++) {
                if (bin[i] != bin2[i]) {
                        cerr << "ERROR: Binary arrays have different content in [" << 
i << "] "
                             << (unsigned int)bin[i] << " != " << (unsigned 
int)bin2[i] << endl;
                        return 1;
                }
        }

        //////////////////////////////////////
        cout << "\nTest successfully done.\n";
        //////////////////////////////////////
        return 0;
}

Attachment: test.data
Description: Binary data

--- varlena.c	2003-09-26 02:54:52 +0400
+++ varlena.new.c	2003-11-18 18:44:58 +0300
@@ -186,7 +186,7 @@
 	{
 		if (*vp == '\\')
 			len += 2;
-		else if (isprint((unsigned char) *vp))
+		else if (isprint((unsigned char) *vp) && isascii((unsigned char) *vp))
 			len++;
 		else
 			len += 4;
@@ -200,7 +200,7 @@
 			*rp++ = '\\';
 			*rp++ = '\\';
 		}
-		else if (isprint((unsigned char) *vp))
+		else if (isprint((unsigned char) *vp) && isascii((unsigned char) *vp))
 			*rp++ = *vp;
 		else
 		{
--- fe-exec.c	2003-11-02 23:42:41 +0300
+++ fe-exec.new.c	2003-11-18 18:52:43 +0300
@@ -2258,10 +2258,9 @@
  *		INSERT statement with a bytea type column as the target.
  *
  *		The following transformations are applied
- *		'\0' == ASCII  0 == \\000
  *		'\'' == ASCII 39 == \'
  *		'\\' == ASCII 92 == \\\\
- *		anything >= 0x80 ---> \\ooo (where ooo is an octal expression)
+ *		anything !isprint || !isascii ---> \\ooo (where ooo is an octal expression)
  */
 unsigned char *
 PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
@@ -2280,7 +2279,7 @@
 	vp = bintext;
 	for (i = binlen; i > 0; i--, vp++)
 	{
-		if (*vp == 0 || *vp >= 0x80)
+		if (!isprint(*vp) || !isascii(*vp))
 			len += 5;			/* '5' is for '\\ooo' */
 		else if (*vp == '\'')
 			len += 2;
@@ -2299,7 +2298,7 @@
 
 	for (i = binlen; i > 0; i--, vp++)
 	{
-		if (*vp == 0 || *vp >= 0x80)
+		if (!isprint(*vp) || !isascii(*vp))
 		{
 			(void) sprintf(rp, "\\\\%03o", *vp);
 			rp += 5;
---------------------------(end of broadcast)---------------------------
TIP 3: if posting/reading through Usenet, please send an appropriate
      subscribe-nomail command to [EMAIL PROTECTED] so that your
      message can get through to the mailing list cleanly

Reply via email to