Changeset: 095959816123 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=095959816123
Modified Files:
        sql/src/server/sql_scan.mx
        sql/src/test/BugTracker-2010/Tests/All
        sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql
        sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err
        sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out
Branch: Jun2010
Log Message:

Fix for Bug #2641

When we encounter an Unicode BOM, skip it.  mclient does skip the BOM
when it happens to be the first thing in a file, but not all clients are
mclient afterall.  Hence, when e.g. a Java API application sends the BOM
it got an error.  The implementation in the SQL server is more complete
than what mclient does, though, as it also skips BOMs that appear in the
middle of a file.

I won't remove the BOM skipping from mclient, since it seems to be added
there for MonetDB/XQuery, which for sure doesn't do this BOM skipping.


diffs (182 lines):

diff -r 7a184f337e33 -r 095959816123 sql/src/server/sql_scan.mx
--- a/sql/src/server/sql_scan.mx        Fri Aug 06 09:50:06 2010 +0200
+++ b/sql/src/server/sql_scan.mx        Fri Aug 06 11:44:51 2010 +0200
@@ -569,6 +569,20 @@
                /* incorrect UTF-8 sequence: not shortest possible */
                goto error;
        }
+
+       /* if we find a BOM interpret it as a "zero-width non-breaking
+        * space" by just skipping it */
+       if (c == 0xFEFF) {
+               /* shift stuff so we won't "see" this BOM when it's in the
+                * middle of some word */
+               memmove(b->buf + b->pos + 3, b->buf + b->pos, lc->yycur - 3);
+               for (n = 0; n < 3; n++) {
+                       b->buf[b->pos++] = ' ';
+                       lc->yycur--;
+               }
+               return(scanner_getc(lc));
+       }
+
        return c;
 
 error:
diff -r 7a184f337e33 -r 095959816123 sql/src/test/BugTracker-2010/Tests/All
--- a/sql/src/test/BugTracker-2010/Tests/All    Fri Aug 06 09:50:06 2010 +0200
+++ b/sql/src/test/BugTracker-2010/Tests/All    Fri Aug 06 11:44:51 2010 +0200
@@ -39,3 +39,4 @@
 function_results_NULL.Bug-2626
 with_row_number_crash.Bug-2631
 complex_query_crash.Bug-2633
+unicode-bom.Bug-2641
diff -r 7a184f337e33 -r 095959816123 
sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql       Fri Aug 
06 11:44:51 2010 +0200
@@ -0,0 +1,10 @@
+-- The Unicode BOM (Byte Order Marker) can exist not only at the start
+-- of the file (where mclient strips it for xquery), but anywhere.
+-- When that happens the BOM should be ignored.
+
+-- the line above has nothing but the BOM, the line below starts with one
+SELECT 1;
+-- next line has the BOM in the middle of the SELECT
+SELECT 1;
+-- finally, more than one BOM scattered over the entire statement
+SELECT 1;
diff -r 7a184f337e33 -r 095959816123 
sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err        
Fri Aug 06 11:44:51 2010 +0200
@@ -0,0 +1,78 @@
+stderr of test 'unicode-bom.Bug-2641` in directory 'src/test/BugTracker-2010` 
itself:
+
+
+# 11:21:04 >  
+# 11:21:04 >   mserver5 
"--config=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf" 
--debug=10 --set gdk_nr_threads=0 --set 
"monet_mod_path=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin"
 --set 
"gdk_dbfarm=/net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm"
  --set mapi_open=true --set xrpc_open=true --set mapi_port=33351 --set 
xrpc_port=40107 --set monet_prompt= --set mal_listing=2 --trace  
"--dbname=mTests_src_test_BugTracker-2010" --set mal_listing=0 ; echo ; echo 
Over..
+# 11:21:04 >  
+
+# builtin opt  gdk_arch = 64bitx86_64-pc-linux-gnu
+# builtin opt  gdk_version = 1.38.4
+# builtin opt  prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64
+# builtin opt  exec_prefix = ${prefix}
+# builtin opt  gdk_dbname = demo
+# builtin opt  gdk_dbfarm = ${prefix}/var/MonetDB/dbfarm
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_alloc_map = no
+# builtin opt  gdk_vmtrim = yes
+# builtin opt  monet_admin = adm
+# builtin opt  monet_prompt = >
+# builtin opt  monet_welcome = yes
+# builtin opt  monet_mod_path = 
/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB
+# builtin opt  monet_daemon = no
+# builtin opt  host = localhost
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_clients = 2
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_debug = 0
+# builtin opt  standoff_ns = 
+# builtin opt  standoff_start = start
+# builtin opt  standoff_end = end
+# config opt   prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64
+# config opt   config = ${prefix}/etc/monetdb5.conf
+# config opt   prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64
+# config opt   exec_prefix = ${prefix}
+# config opt   gdk_dbfarm = ${prefix}/var/MonetDB5/dbfarm
+# config opt   monet_mod_path = 
/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin
+# config opt   mero_pidfile = ${prefix}/var/run/MonetDB/merovingian.pid
+# config opt   mero_controlport = 50001
+# config opt   sql_optimizer = default_pipe
+# config opt   minimal_pipe = inline,remap,deadcode,multiplex,garbageCollector
+# config opt   default_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   nov2009_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   replication_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,replication,multiplex,garbageCollector
+# config opt   accumulator_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,accumulators,dataflow,history,multiplex,garbageCollector
+# config opt   recycler_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,deadcode,constants,commonTerms,joinPath,deadcode,recycle,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   cracker_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,selcrack,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   sidcrack_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,sidcrack,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   datacell_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,deadcode,constants,commonTerms,joinPath,datacell,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   octopus_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,octopus,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   mapreduce_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mapreduce,mergetable,deadcode,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   datacyclotron_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,datacyclotron,mergetable,deadcode,constants,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,replication,multiplex,garbageCollector
+# config opt   derive_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,derivePath,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   dictionary_pipe = 
inline,remap,dictionary,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector
+# config opt   compression_pipe = 
inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,compression,dataflow,history,multiplex,garbageCollector
+# cmdline opt  config = 
/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  monet_mod_path = 
/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin
+# cmdline opt  gdk_dbfarm = 
/net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm
+# cmdline opt  mapi_open = true
+# cmdline opt  xrpc_open = true
+# cmdline opt  mapi_port = 33351
+# cmdline opt  xrpc_port = 40107
+# cmdline opt  monet_prompt = 
+# cmdline opt  mal_listing = 2
+# cmdline opt  gdk_dbname = mTests_src_test_BugTracker-2010
+# cmdline opt  mal_listing = 0
+#warning: please don't forget to set your vault key!
+#(see /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf)
+
+# 11:21:04 >  
+# 11:21:04 >  mclient -lsql -ftest -i -e --host=volund --port=33351 
+# 11:21:04 >  
+
+
+# 11:21:04 >  
+# 11:21:04 >  Done.
+# 11:21:04 >  
+
diff -r 7a184f337e33 -r 095959816123 
sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out        
Fri Aug 06 11:44:51 2010 +0200
@@ -0,0 +1,50 @@
+stdout of test 'unicode-bom.Bug-2641` in directory 'src/test/BugTracker-2010` 
itself:
+
+
+# 11:21:04 >  
+# 11:21:04 >   mserver5 
"--config=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf" 
--debug=10 --set gdk_nr_threads=0 --set 
"monet_mod_path=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin"
 --set 
"gdk_dbfarm=/net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm"
  --set mapi_open=true --set xrpc_open=true --set mapi_port=33351 --set 
xrpc_port=40107 --set monet_prompt= --set mal_listing=2 --trace  
"--dbname=mTests_src_test_BugTracker-2010" --set mal_listing=0 ; echo ; echo 
Over..
+# 11:21:04 >  
+
+# MonetDB server v5.20.4, based on kernel v1.38.4
+# Release Jun2010-hg
+# Serving database 'mTests_src_test_BugTracker-2010', using 4 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 64bit OIDs dynamically linked
+# Found 7.751 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2010 MonetDB B.V., all rights reserved
+# Visit http://monetdb.cwi.nl/ for further information
+# Listening for connection requests on mapi:monetdb://volund.ins.cwi.nl:33351/
+# MonetDB/SQL module v2.38.4 loaded
+# MonetDB/GIS module v0.18.0 loaded
+
+Ready.
+
+Over..
+
+# 11:21:04 >  
+# 11:21:04 >  mclient -lsql -ftest -i -e --host=volund --port=33351 
+# 11:21:04 >  
+
+#SELECT 1;
+% . # table_name
+% single_value # name
+% tinyint # type
+% 1 # length
+[ 1    ]
+#SELECT 1;
+% . # table_name
+% single_value # name
+% tinyint # type
+% 1 # length
+[ 1    ]
+#SELECT 1;
+% . # table_name
+% single_value # name
+% tinyint # type
+% 1 # length
+[ 1    ]
+
+# 11:21:04 >  
+# 11:21:04 >  Done.
+# 11:21:04 >  
+
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to