Changeset: 095959816123 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=095959816123 Modified Files: sql/src/server/sql_scan.mx sql/src/test/BugTracker-2010/Tests/All sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out Branch: Jun2010 Log Message:
Fix for Bug #2641 When we encounter an Unicode BOM, skip it. mclient does skip the BOM when it happens to be the first thing in a file, but not all clients are mclient afterall. Hence, when e.g. a Java API application sends the BOM it got an error. The implementation in the SQL server is more complete than what mclient does, though, as it also skips BOMs that appear in the middle of a file. I won't remove the BOM skipping from mclient, since it seems to be added there for MonetDB/XQuery, which for sure doesn't do this BOM skipping. diffs (182 lines): diff -r 7a184f337e33 -r 095959816123 sql/src/server/sql_scan.mx --- a/sql/src/server/sql_scan.mx Fri Aug 06 09:50:06 2010 +0200 +++ b/sql/src/server/sql_scan.mx Fri Aug 06 11:44:51 2010 +0200 @@ -569,6 +569,20 @@ /* incorrect UTF-8 sequence: not shortest possible */ goto error; } + + /* if we find a BOM interpret it as a "zero-width non-breaking + * space" by just skipping it */ + if (c == 0xFEFF) { + /* shift stuff so we won't "see" this BOM when it's in the + * middle of some word */ + memmove(b->buf + b->pos + 3, b->buf + b->pos, lc->yycur - 3); + for (n = 0; n < 3; n++) { + b->buf[b->pos++] = ' '; + lc->yycur--; + } + return(scanner_getc(lc)); + } + return c; error: diff -r 7a184f337e33 -r 095959816123 sql/src/test/BugTracker-2010/Tests/All --- a/sql/src/test/BugTracker-2010/Tests/All Fri Aug 06 09:50:06 2010 +0200 +++ b/sql/src/test/BugTracker-2010/Tests/All Fri Aug 06 11:44:51 2010 +0200 @@ -39,3 +39,4 @@ function_results_NULL.Bug-2626 with_row_number_crash.Bug-2631 complex_query_crash.Bug-2633 +unicode-bom.Bug-2641 diff -r 7a184f337e33 -r 095959816123 sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.sql Fri Aug 06 11:44:51 2010 +0200 @@ -0,0 +1,10 @@ +-- The Unicode BOM (Byte Order Marker) can exist not only at the start +-- of the file (where mclient strips it for xquery), but anywhere. +-- When that happens the BOM should be ignored. + +-- the line above has nothing but the BOM, the line below starts with one +SELECT 1; +-- next line has the BOM in the middle of the SELECT +SELECT 1; +-- finally, more than one BOM scattered over the entire statement +SELECT 1; diff -r 7a184f337e33 -r 095959816123 sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.err Fri Aug 06 11:44:51 2010 +0200 @@ -0,0 +1,78 @@ +stderr of test 'unicode-bom.Bug-2641` in directory 'src/test/BugTracker-2010` itself: + + +# 11:21:04 > +# 11:21:04 > mserver5 "--config=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf" --debug=10 --set gdk_nr_threads=0 --set "monet_mod_path=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin" --set "gdk_dbfarm=/net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm" --set mapi_open=true --set xrpc_open=true --set mapi_port=33351 --set xrpc_port=40107 --set monet_prompt= --set mal_listing=2 --trace "--dbname=mTests_src_test_BugTracker-2010" --set mal_listing=0 ; echo ; echo Over.. +# 11:21:04 > + +# builtin opt gdk_arch = 64bitx86_64-pc-linux-gnu +# builtin opt gdk_version = 1.38.4 +# builtin opt prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64 +# builtin opt exec_prefix = ${prefix} +# builtin opt gdk_dbname = demo +# builtin opt gdk_dbfarm = ${prefix}/var/MonetDB/dbfarm +# builtin opt gdk_debug = 0 +# builtin opt gdk_alloc_map = no +# builtin opt gdk_vmtrim = yes +# builtin opt monet_admin = adm +# builtin opt monet_prompt = > +# builtin opt monet_welcome = yes +# builtin opt monet_mod_path = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB +# builtin opt monet_daemon = no +# builtin opt host = localhost +# builtin opt mapi_port = 50000 +# builtin opt mapi_clients = 2 +# builtin opt mapi_open = false +# builtin opt mapi_autosense = false +# builtin opt sql_debug = 0 +# builtin opt standoff_ns = +# builtin opt standoff_start = start +# builtin opt standoff_end = end +# config opt prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64 +# config opt config = ${prefix}/etc/monetdb5.conf +# config opt prefix = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64 +# config opt exec_prefix = ${prefix} +# config opt gdk_dbfarm = ${prefix}/var/MonetDB5/dbfarm +# config opt monet_mod_path = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin +# config opt mero_pidfile = ${prefix}/var/run/MonetDB/merovingian.pid +# config opt mero_controlport = 50001 +# config opt sql_optimizer = default_pipe +# config opt minimal_pipe = inline,remap,deadcode,multiplex,garbageCollector +# config opt default_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt nov2009_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt replication_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,replication,multiplex,garbageCollector +# config opt accumulator_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,accumulators,dataflow,history,multiplex,garbageCollector +# config opt recycler_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,deadcode,constants,commonTerms,joinPath,deadcode,recycle,reduce,dataflow,history,multiplex,garbageCollector +# config opt cracker_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,selcrack,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt sidcrack_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,sidcrack,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt datacell_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,deadcode,constants,commonTerms,joinPath,datacell,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt octopus_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mitosis,mergetable,deadcode,constants,commonTerms,joinPath,octopus,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt mapreduce_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mapreduce,mergetable,deadcode,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt datacyclotron_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,datacyclotron,mergetable,deadcode,constants,commonTerms,joinPath,reorder,deadcode,reduce,dataflow,history,replication,multiplex,garbageCollector +# config opt derive_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,derivePath,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt dictionary_pipe = inline,remap,dictionary,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,history,multiplex,garbageCollector +# config opt compression_pipe = inline,remap,evaluate,costModel,coercions,emptySet,aliases,mergetable,deadcode,constants,commonTerms,joinPath,deadcode,reduce,dataflow,compression,dataflow,history,multiplex,garbageCollector +# cmdline opt config = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf +# cmdline opt gdk_nr_threads = 0 +# cmdline opt monet_mod_path = /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin +# cmdline opt gdk_dbfarm = /net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm +# cmdline opt mapi_open = true +# cmdline opt xrpc_open = true +# cmdline opt mapi_port = 33351 +# cmdline opt xrpc_port = 40107 +# cmdline opt monet_prompt = +# cmdline opt mal_listing = 2 +# cmdline opt gdk_dbname = mTests_src_test_BugTracker-2010 +# cmdline opt mal_listing = 0 +#warning: please don't forget to set your vault key! +#(see /ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf) + +# 11:21:04 > +# 11:21:04 > mclient -lsql -ftest -i -e --host=volund --port=33351 +# 11:21:04 > + + +# 11:21:04 > +# 11:21:04 > Done. +# 11:21:04 > + diff -r 7a184f337e33 -r 095959816123 sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sql/src/test/BugTracker-2010/Tests/unicode-bom.Bug-2641.stable.out Fri Aug 06 11:44:51 2010 +0200 @@ -0,0 +1,50 @@ +stdout of test 'unicode-bom.Bug-2641` in directory 'src/test/BugTracker-2010` itself: + + +# 11:21:04 > +# 11:21:04 > mserver5 "--config=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/etc/monetdb5.conf" --debug=10 --set gdk_nr_threads=0 --set "monet_mod_path=/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/lib:/ufs/fabian/scratch/monetdb/Jun2010/program-x86_64/lib/MonetDB5/bin" --set "gdk_dbfarm=/net/volund.ins.cwi.nl/export/scratch0/fabian/vtmp/mtest-Jun2010-volund.ins.cwi.nl/sql/dbfarm" --set mapi_open=true --set xrpc_open=true --set mapi_port=33351 --set xrpc_port=40107 --set monet_prompt= --set mal_listing=2 --trace "--dbname=mTests_src_test_BugTracker-2010" --set mal_listing=0 ; echo ; echo Over.. +# 11:21:04 > + +# MonetDB server v5.20.4, based on kernel v1.38.4 +# Release Jun2010-hg +# Serving database 'mTests_src_test_BugTracker-2010', using 4 threads +# Compiled for x86_64-pc-linux-gnu/64bit with 64bit OIDs dynamically linked +# Found 7.751 GiB available main-memory. +# Copyright (c) 1993-July 2008 CWI. +# Copyright (c) August 2008-2010 MonetDB B.V., all rights reserved +# Visit http://monetdb.cwi.nl/ for further information +# Listening for connection requests on mapi:monetdb://volund.ins.cwi.nl:33351/ +# MonetDB/SQL module v2.38.4 loaded +# MonetDB/GIS module v0.18.0 loaded + +Ready. + +Over.. + +# 11:21:04 > +# 11:21:04 > mclient -lsql -ftest -i -e --host=volund --port=33351 +# 11:21:04 > + +#SELECT 1; +% . # table_name +% single_value # name +% tinyint # type +% 1 # length +[ 1 ] +#SELECT 1; +% . # table_name +% single_value # name +% tinyint # type +% 1 # length +[ 1 ] +#SELECT 1; +% . # table_name +% single_value # name +% tinyint # type +% 1 # length +[ 1 ] + +# 11:21:04 > +# 11:21:04 > Done. +# 11:21:04 > + _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list