> Author: kevans > Date: Sun Jun 7 04:32:38 2020 > New Revision: 361884 > URL: https://svnweb.freebsd.org/changeset/base/361884 > > Log: > sed: attempt to learn about hex escapes (e.g. \x27) > > Somewhat predictably, software often wants to use \x27/\x24 among others so > that they can decline worrying about ugly escaping, if said escaping is even > possible. Right now, this software is using these and getting the wrong > results, as we'll interpret those as x27 and x24 respectively. Some examples > of this, when an exp-run was ran, were science/octopus and misc/vifm. > > Go ahead and process these at all times. We allow either one or two digits, > and the tests account for both. If extra digits are specified, e.g. \x2727, > then the third and fourth digits are interpreted literally as one might > expect.
Does it work to do \\x27, ie I want it to NOT do \x27 so I can sed on files that contain sequences of escapes. > > PR: 229925 > MFC after: 2 weeks > > Modified: > head/usr.bin/sed/compile.c > head/usr.bin/sed/tests/sed2_test.sh > > Modified: head/usr.bin/sed/compile.c > ============================================================================== > --- head/usr.bin/sed/compile.c Sun Jun 7 03:11:34 2020 > (r361883) > +++ head/usr.bin/sed/compile.c Sun Jun 7 04:32:38 2020 > (r361884) > @@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c 8.1 (Berke > #include <fcntl.h> > #include <limits.h> > #include <regex.h> > +#include <stdbool.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > @@ -365,6 +366,51 @@ nonsel: /* Now parse the command */ > } > } > > +static int > +hex2char(const char *in, char *out, int len) > +{ > + long ord; > + char *endptr, hexbuf[3]; > + > + hexbuf[0] = in[0]; > + hexbuf[1] = len > 1 ? in[1] : '\0'; > + hexbuf[2] = '\0'; > + > + errno = 0; > + ord = strtol(hexbuf, &endptr, 16); > + if (*endptr != '\0' || errno != 0) > + return (ERANGE); > + *out = (char)ord; > + return (0); > +} > + > +static bool > +hexdigit(char c) > +{ > + int lc; > + > + lc = tolower(c); > + return isdigit(lc) || (lc >= 'a' && lc <= 'f'); > +} > + > +static bool > +dohex(const char *in, char *out, int *len) > +{ > + int tmplen; > + > + if (!hexdigit(in[0])) > + return (false); > + tmplen = 1; > + if (hexdigit(in[1])) > + ++tmplen; > + if (hex2char(in, out, tmplen) == 0) { > + *len = tmplen; > + return (true); > + } > + > + return (false); > +} > + > /* > * Get a delimited string. P points to the delimiter of the string; d points > * to a buffer area. Newline and delimiter escapes are processed; other > @@ -377,6 +423,7 @@ nonsel: /* Now parse the command */ > static char * > compile_delimited(char *p, char *d, int is_tr) > { > + int hexlen; > char c; > > c = *p++; > @@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr) > } > p += 2; > continue; > + } else if (*p == '\\' && p[1] == 'x') { > + if (dohex(&p[2], d, &hexlen)) { > + ++d; > + p += hexlen + 2; > + continue; > + } > } else if (*p == '\\' && p[1] == '\\') { > if (is_tr) > p++; > @@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr) > static char * > compile_ccl(char **sp, char *t) > { > - int c, d; > + int c, d, hexlen; > char *s = *sp; > > *t++ = *s++; > @@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t) > *t = '\t'; > s++; > break; > + case 'x': > + if (dohex(&s[2], t, &hexlen)) > + s += hexlen + 1; > + break; > } > } > } > @@ -499,7 +556,7 @@ static char * > compile_subst(char *p, struct s_subst *s) > { > static char lbuf[_POSIX2_LINE_MAX + 1]; > - int asize, size; > + int asize, hexlen, size; > u_char ref; > char c, *text, *op, *sp; > int more = 1, sawesc = 0; > @@ -562,6 +619,21 @@ compile_subst(char *p, struct s_subst *s) > break; > case 't': > *p = '\t'; > + break; > + case 'x': > +#define ADVANCE_N(s, n) \ > + do { \ > + char *adv = (s); \ > + while (*(adv + (n) - 1) != '\0') { \ > + *adv = *(adv + (n)); \ > + ++adv; \ > + } \ > + *adv = '\0'; \ > + } while (0); > + if (dohex(&p[1], p, &hexlen)) { > + ADVANCE_N(p + 1, > + hexlen); > + } > break; > } > } > > Modified: head/usr.bin/sed/tests/sed2_test.sh > ============================================================================== > --- head/usr.bin/sed/tests/sed2_test.sh Sun Jun 7 03:11:34 2020 > (r361883) > +++ head/usr.bin/sed/tests/sed2_test.sh Sun Jun 7 04:32:38 2020 > (r361884) > @@ -88,10 +88,39 @@ escape_subst_body() > atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c > } > > +atf_test_case hex_subst > +hex_subst_head() > +{ > + atf_set "descr" "Verify proper conversion of hex escapes" > +} > +hex_subst_body() > +{ > + printf "test='foo'" > a > + printf "test='27foo'" > b > + printf "\rn" > c > + printf "xx" > d > + > + atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a > + atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a > + > + # Make sure we take trailing digits literally. > + atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b > + > + # Single digit \x should work as well. > + atf_check -o "inline:xn" sed 's/\xd/x/' c > + > + # Invalid digit should cause us to ignore the sequence. This test > + # invokes UB, escapes of an ordinary character. A future change will > + # make regex(3) on longer tolerate this and we'll need to adjust what > + # we're doing, but for now this will suffice. > + atf_check -o "inline:" sed 's/\xx//' d > +} > + > atf_init_test_cases() > { > atf_add_test_case inplace_command_q > atf_add_test_case inplace_hardlink_src > atf_add_test_case inplace_symlink_src > atf_add_test_case escape_subst > + atf_add_test_case hex_subst > } > -- Rod Grimes rgri...@freebsd.org _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"