> Author: kevans
> Date: Sun Jun  7 04:32:38 2020
> New Revision: 361884
> URL: https://svnweb.freebsd.org/changeset/base/361884
> 
> Log:
>   sed: attempt to learn about hex escapes (e.g. \x27)
>   
>   Somewhat predictably, software often wants to use \x27/\x24 among others so
>   that they can decline worrying about ugly escaping, if said escaping is even
>   possible. Right now, this software is using these and getting the wrong
>   results, as we'll interpret those as x27 and x24 respectively. Some examples
>   of this, when an exp-run was ran, were science/octopus and misc/vifm.
>   
>   Go ahead and process these at all times.  We allow either one or two digits,
>   and the tests account for both.  If extra digits are specified, e.g. \x2727,
>   then the third and fourth digits are interpreted literally as one might
>   expect.

Does it work to do \\x27, ie I want it to NOT do \x27 so I can sed
on files that contain sequences of escapes.

>   
>   PR:         229925
>   MFC after:  2 weeks
> 
> Modified:
>   head/usr.bin/sed/compile.c
>   head/usr.bin/sed/tests/sed2_test.sh
> 
> Modified: head/usr.bin/sed/compile.c
> ==============================================================================
> --- head/usr.bin/sed/compile.c        Sun Jun  7 03:11:34 2020        
> (r361883)
> +++ head/usr.bin/sed/compile.c        Sun Jun  7 04:32:38 2020        
> (r361884)
> @@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c 8.1 (Berke
>  #include <fcntl.h>
>  #include <limits.h>
>  #include <regex.h>
> +#include <stdbool.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -365,6 +366,51 @@ nonsel:          /* Now parse the command */
>       }
>  }
>  
> +static int
> +hex2char(const char *in, char *out, int len)
> +{
> +     long ord;
> +     char *endptr, hexbuf[3];
> +
> +     hexbuf[0] = in[0];
> +     hexbuf[1] = len > 1 ? in[1] : '\0';
> +     hexbuf[2] = '\0';
> +
> +     errno = 0;
> +     ord = strtol(hexbuf, &endptr, 16);
> +     if (*endptr != '\0' || errno != 0)
> +             return (ERANGE);
> +     *out = (char)ord;
> +     return (0);
> +}
> +
> +static bool
> +hexdigit(char c)
> +{
> +     int lc;
> +
> +     lc = tolower(c);
> +     return isdigit(lc) || (lc >= 'a' && lc <= 'f');
> +}
> +
> +static bool
> +dohex(const char *in, char *out, int *len)
> +{
> +     int tmplen;
> +
> +     if (!hexdigit(in[0]))
> +             return (false);
> +     tmplen = 1;
> +     if (hexdigit(in[1]))
> +             ++tmplen;
> +     if (hex2char(in, out, tmplen) == 0) {
> +             *len = tmplen;
> +             return (true);
> +     }
> +
> +     return (false);
> +}
> +
>  /*
>   * Get a delimited string.  P points to the delimiter of the string; d points
>   * to a buffer area.  Newline and delimiter escapes are processed; other
> @@ -377,6 +423,7 @@ nonsel:           /* Now parse the command */
>  static char *
>  compile_delimited(char *p, char *d, int is_tr)
>  {
> +     int hexlen;
>       char c;
>  
>       c = *p++;
> @@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr)
>                       }
>                       p += 2;
>                       continue;
> +             } else if (*p == '\\' && p[1] == 'x') {
> +                     if (dohex(&p[2], d, &hexlen)) {
> +                             ++d;
> +                             p += hexlen + 2;
> +                             continue;
> +                     }
>               } else if (*p == '\\' && p[1] == '\\') {
>                       if (is_tr)
>                               p++;
> @@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr)
>  static char *
>  compile_ccl(char **sp, char *t)
>  {
> -     int c, d;
> +     int c, d, hexlen;
>       char *s = *sp;
>  
>       *t++ = *s++;
> @@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t)
>                               *t = '\t';
>                               s++;
>                               break;
> +                     case 'x':
> +                             if (dohex(&s[2], t, &hexlen))
> +                                     s += hexlen + 1;
> +                             break;
>                       }
>               }
>       }
> @@ -499,7 +556,7 @@ static char *
>  compile_subst(char *p, struct s_subst *s)
>  {
>       static char lbuf[_POSIX2_LINE_MAX + 1];
> -     int asize, size;
> +     int asize, hexlen, size;
>       u_char ref;
>       char c, *text, *op, *sp;
>       int more = 1, sawesc = 0;
> @@ -562,6 +619,21 @@ compile_subst(char *p, struct s_subst *s)
>                                               break;
>                                       case 't':
>                                               *p = '\t';
> +                                             break;
> +                                     case 'x':
> +#define      ADVANCE_N(s, n)                                 \
> +     do {                                            \
> +             char *adv = (s);                        \
> +             while (*(adv + (n) - 1) != '\0') {      \
> +                     *adv = *(adv + (n));            \
> +                     ++adv;                          \
> +             }                                       \
> +             *adv = '\0';                            \
> +     } while (0);
> +                                             if (dohex(&p[1], p, &hexlen)) {
> +                                                     ADVANCE_N(p + 1,
> +                                                         hexlen);
> +                                             }
>                                               break;
>                                       }
>                               }
> 
> Modified: head/usr.bin/sed/tests/sed2_test.sh
> ==============================================================================
> --- head/usr.bin/sed/tests/sed2_test.sh       Sun Jun  7 03:11:34 2020        
> (r361883)
> +++ head/usr.bin/sed/tests/sed2_test.sh       Sun Jun  7 04:32:38 2020        
> (r361884)
> @@ -88,10 +88,39 @@ escape_subst_body()
>       atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
>  }
>  
> +atf_test_case hex_subst
> +hex_subst_head()
> +{
> +     atf_set "descr" "Verify proper conversion of hex escapes"
> +}
> +hex_subst_body()
> +{
> +     printf "test='foo'" > a
> +     printf "test='27foo'" > b
> +     printf "\rn" > c
> +     printf "xx" > d
> +
> +     atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a
> +     atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a
> +
> +     # Make sure we take trailing digits literally.
> +     atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b
> +
> +     # Single digit \x should work as well.
> +     atf_check -o "inline:xn" sed 's/\xd/x/' c
> +
> +     # Invalid digit should cause us to ignore the sequence.  This test
> +     # invokes UB, escapes of an ordinary character.  A future change will
> +     # make regex(3) on longer tolerate this and we'll need to adjust what
> +     # we're doing, but for now this will suffice.
> +     atf_check -o "inline:" sed 's/\xx//' d
> +}
> +
>  atf_init_test_cases()
>  {
>       atf_add_test_case inplace_command_q
>       atf_add_test_case inplace_hardlink_src
>       atf_add_test_case inplace_symlink_src
>       atf_add_test_case escape_subst
> +     atf_add_test_case hex_subst
>  }
> 

-- 
Rod Grimes                                                 rgri...@freebsd.org
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to