[PATCH] Fix non-ASCII cmdline options in non-UTF-8 locales

Julian Foad Thu, 25 Apr 2013 16:33:22 -0700

While reviewing r1475724 "Fix changelist filtering when --changelist values 
aren't UTF8", I noticed we also fail to convert some other command-line options 
to UTF-8.


The attached patch addresses:

  --old, --new: 
    Fix usage of non-ASCII paths. It looked for the wrong file and so was
    unusable.

  --search, --search-and:
    Fix usage of non-ASCII search terms. It looked for the wrong text and so
    was unusable.

  --limit, --accept, --show-revs, --strip, --change: 
    Fix the error message shown for an invalid value. It showed the invalid
    value using backslash-escape notation for the non-ASCII characters.

  --file:
    Fix the check for accidentally specifying a versioned file. It looked
    for the wrong file and so typically would not trigger when it should.

  --with-revprop:
    - '=' characters are examined before converting to UTF-8; this should be 
safe becaue '=' is in the ASCII Basic Code Table [1].
    - Property names are always converted to UTF-8; this is good.
    - Property values are not; this would be wrong for svn:* props, but svn:* 
props are not currently allowed to be set by this method, so OK.

Some other arguments, including '--native-eol', are not converted before 
testing for valid values, and the valid values are always ASCII.  This is OK, 
if we assume an ASCII subset.  That's probably safe, as [1] indicates that all 
common encodings except EBCDIC have an ASCII subset.  On the other hand, it 
does no harm to convert first, and doing so is more consistent with how we 
handle other options.

Any comments?

- Julian

[1] ASCII IRV and BCT 
<http://www.debian.org/doc/manuals/intro-i18n/ch-codes.en.html#s-ascii>.


--
Join WANdisco's free daily demo sessions on Scaling Subversion for the 
Enterprise
<http://www.wandisco.com/training/webinars>

Fix non-ASCII character handling in several 'svn' command-line options.

  --old, --new:
    Fix usage of non-ASCII paths. It looked for the wrong file and so was
    unusable.

  --search, --search-and:
    Fix usage of non-ASCII search terms. It looked for the wrong text and so
    was unusable.

  --limit, --accept, --show-revs, --strip, --change:
    Fix the error message shown for an invalid value. It showed the invalid
    value using backslash-escape notation for the non-ASCII characters.

  --file:
    Fix the check for accidentally specifying a versioned file. It looked
    for the wrong file and so typically would not trigger when it should.

* subversion/svn/svn.c
  (sub_main): Convert option arguments to UTF-8 before using them, in most
    cases where we weren't already doing so.
--This line, and those below, will be ignored--

Index: subversion/svn/svn.c
===================================================================
--- subversion/svn/svn.c	(revision 1475982)
+++ subversion/svn/svn.c	(working copy)
@@ -1795,13 +1795,14 @@ sub_main(int argc, const char *argv[], a
       /* Stash the option code in an array before parsing it. */
       APR_ARRAY_PUSH(received_opts, int) = opt_id;
 
       switch (opt_id) {
       case 'l':
         {
-          err = svn_cstring_atoi(&opt_state.limit, opt_arg);
+          SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+          err = svn_cstring_atoi(&opt_state.limit, utf8_opt_arg);
           if (err)
             {
               err = svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, err,
                                      _("Non-numeric limit argument given"));
               return EXIT_ERROR(err);
             }
@@ -1811,22 +1812,24 @@ sub_main(int argc, const char *argv[], a
                                     _("Argument to --limit must be positive"));
               return EXIT_ERROR(err);
             }
         }
         break;
       case 'm':
-        /* Note that there's no way here to detect if the log message
-           contains a zero byte -- if it does, then opt_arg will just
-           be shorter than the user intended.  Oh well. */
+        /* We don't convert the log message to UTF-8 yet; ### Is that
+           because we might not have processed the '--encoding' option yet?
+           The log message callback converts it. */
         opt_state.message = apr_pstrdup(pool, opt_arg);
-        dash_m_arg = opt_arg;
+        dash_m_arg = opt_arg;  /* not converted to UTF-8 */
         break;
       case 'c':
         {
-          apr_array_header_t *change_revs =
-            svn_cstring_split(opt_arg, ", \n\r\t\v", TRUE, pool);
+          apr_array_header_t *change_revs;
+
+          SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+          change_revs = svn_cstring_split(utf8_opt_arg, ", \n\r\t\v", TRUE, pool);
 
           if (opt_state.old_target)
             {
               err = svn_error_create
                 (SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                  _("Can't specify -c with --old"));
@@ -1915,16 +1918,16 @@ sub_main(int argc, const char *argv[], a
                                                        pool);
             }
         }
         break;
       case 'r':
         opt_state.used_revision_arg = TRUE;
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
         if (svn_opt_parse_revision_to_range(opt_state.revision_ranges,
-                                            opt_arg, pool) != 0)
+                                            utf8_opt_arg, pool) != 0)
           {
-            SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
             err = svn_error_createf
                 (SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                  _("Syntax error in revision argument '%s'"),
                  utf8_opt_arg);
             return EXIT_ERROR(err);
           }
@@ -1947,22 +1950,18 @@ sub_main(int argc, const char *argv[], a
         break;
       case 'F':
         SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
         SVN_INT_ERR(svn_stringbuf_from_file2(&(opt_state.filedata),
                                              utf8_opt_arg, pool));
         reading_file_from_stdin = (strcmp(utf8_opt_arg, "-") == 0);
-        dash_F_arg = opt_arg;
+        dash_F_arg = utf8_opt_arg;
         break;
       case opt_targets:
         {
           svn_stringbuf_t *buffer, *buffer_utf8;
 
-          /* We need to convert to UTF-8 now, even before we divide
-             the targets into an array, because otherwise we wouldn't
-             know what delimiter to use for svn_cstring_split().  */
-
           SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
           SVN_INT_ERR(svn_stringbuf_from_file2(&buffer, utf8_opt_arg, pool));
           SVN_INT_ERR(svn_utf_stringbuf_to_utf8(&buffer_utf8, buffer, pool));
           opt_state.targets = svn_cstring_split(buffer_utf8->data, "\n\r",
                                                 TRUE, pool);
         }
@@ -2106,47 +2105,46 @@ sub_main(int argc, const char *argv[], a
           {
             err = svn_error_create
               (SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                _("Can't specify -c with --old"));
             return EXIT_ERROR(err);
           }
-        opt_state.old_target = apr_pstrdup(pool, opt_arg);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        opt_state.old_target = apr_pstrdup(pool, utf8_opt_arg);
         break;
       case opt_new_cmd:
-        opt_state.new_target = apr_pstrdup(pool, opt_arg);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        opt_state.new_target = apr_pstrdup(pool, utf8_opt_arg);
         break;
       case opt_config_dir:
-        {
-          const char *path_utf8;
-          SVN_INT_ERR(svn_utf_cstring_to_utf8(&path_utf8, opt_arg, pool));
-          opt_state.config_dir = svn_dirent_internal_style(path_utf8, pool);
-        }
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        opt_state.config_dir = svn_dirent_internal_style(utf8_opt_arg, pool);
         break;
       case opt_config_options:
         if (!opt_state.config_options)
           opt_state.config_options =
                    apr_array_make(pool, 1,
                                   sizeof(svn_cmdline__config_argument_t*));
 
-        SVN_INT_ERR(svn_utf_cstring_to_utf8(&opt_arg, opt_arg, pool));
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
         SVN_INT_ERR(svn_cmdline__parse_config_option(opt_state.config_options,
-                                                     opt_arg, pool));
+                                                     utf8_opt_arg, pool));
         break;
       case opt_autoprops:
         opt_state.autoprops = TRUE;
         break;
       case opt_no_autoprops:
         opt_state.no_autoprops = TRUE;
         break;
       case opt_native_eol:
-        if ( !strcmp("LF", opt_arg) || !strcmp("CR", opt_arg) ||
-             !strcmp("CRLF", opt_arg))
-          opt_state.native_eol = apr_pstrdup(pool, opt_arg);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        if ( !strcmp("LF", utf8_opt_arg) || !strcmp("CR", utf8_opt_arg) ||
+             !strcmp("CRLF", utf8_opt_arg))
+          opt_state.native_eol = utf8_opt_arg;
         else
           {
-            SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
             err = svn_error_createf
                 (SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                  _("Syntax error in native-eol argument '%s'"),
                  utf8_opt_arg);
             return EXIT_ERROR(err);
           }
@@ -2193,37 +2191,41 @@ sub_main(int argc, const char *argv[], a
         opt_state.parents = TRUE;
         break;
       case 'g':
         opt_state.use_merge_history = TRUE;
         break;
       case opt_accept:
-        opt_state.accept_which = svn_cl__accept_from_word(opt_arg);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        opt_state.accept_which = svn_cl__accept_from_word(utf8_opt_arg);
         if (opt_state.accept_which == svn_cl__accept_invalid)
           return EXIT_ERROR
             (svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                                _("'%s' is not a valid --accept value"),
-                               opt_arg));
+                               utf8_opt_arg));
         break;
       case opt_show_revs:
-        opt_state.show_revs = svn_cl__show_revs_from_word(opt_arg);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        opt_state.show_revs = svn_cl__show_revs_from_word(utf8_opt_arg);
         if (opt_state.show_revs == svn_cl__show_revs_invalid)
           return EXIT_ERROR
             (svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
                                _("'%s' is not a valid --show-revs value"),
-                               opt_arg));
+                               utf8_opt_arg));
         break;
       case opt_reintegrate:
         opt_state.reintegrate = TRUE;
         break;
       case opt_strip:
         {
-          err = svn_cstring_atoi(&opt_state.strip, opt_arg);
+          SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+          err = svn_cstring_atoi(&opt_state.strip, utf8_opt_arg);
           if (err)
             {
               err = svn_error_createf(SVN_ERR_CL_ARG_PARSING_ERROR, err,
-                                      _("Invalid strip count '%s'"), opt_arg);
+                                      _("Invalid strip count '%s'"),
+                                      utf8_opt_arg);
               return EXIT_ERROR(err);
             }
           if (opt_state.strip < 0)
             {
               err = svn_error_create(SVN_ERR_INCORRECT_PARAMS, NULL,
                                      _("Argument to --strip must be positive"));
@@ -2262,16 +2264,18 @@ sub_main(int argc, const char *argv[], a
         opt_state.show_inherited_props = TRUE;
         break;
       case opt_properties_only:
         opt_state.diff.properties_only = TRUE;
         break;
       case opt_search:
-        add_search_pattern_group(&opt_state, opt_arg, pool);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        add_search_pattern_group(&opt_state, utf8_opt_arg, pool);
         break;
       case opt_search_and:
-        add_search_pattern_to_latest_group(&opt_state, opt_arg, pool);
+        SVN_INT_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
+        add_search_pattern_to_latest_group(&opt_state, utf8_opt_arg, pool);
       default:
         /* Hmmm. Perhaps this would be a good place to squirrel away
            opts that commands like svn diff might need. Hmmm indeed. */
         break;
       }
     }

[PATCH] Fix non-ASCII cmdline options in non-UTF-8 locales

Reply via email to