Re: [PATCH] better UTF-8 support for buffer_sample

Nicholas Marriott Sun, 09 Mar 2014 12:04:57 -0700

On Mon, Mar 10, 2014 at 12:47:46AM +0900, Kosuke ASAMI wrote:
> The first zero check of 'len' seem to be unnecessary.
>


Yes you are right, thanks. Will look at this again and apply it later
this week.

> I have fixed the decrement number from 'src' when an UTF-8 character
> doesn't finish.
> 
> 2014-03-09 18:16 GMT+09:00 Nicholas Marriott <[email protected]>:
> > Actually I think this function is wrong because it reads over the end of
> > src. len is the length of src, not dst. We know there is always enough
> > space in dst.
> >
> > I haven't time now to look at this closely or test it but maybe
> > something like:
> >
> > diff --git a/utf8.c b/utf8.c
> > index 63723d7..b1f44bf 100644
> > --- a/utf8.c
> > +++ b/utf8.c
> > @@ -352,3 +352,51 @@ utf8_width(const struct utf8_data *utf8data)
> >         }
> >         return (1);
> >  }
> > +
> > +/*
> > + * Encode len characters from src into dst, which is guaranteed to have 
> > four
> > + * bytes available for each character from src (for \abc or UTF-8) plus 
> > space
> > + * for \0.
> > + */
> > +int
> > +utf8_strvis(char *dst, const char *src, size_t len, int flag)
> > +{
> > +       struct utf8_data         utf8data;
> > +       const char              *start, *end;
> > +       int                      more;
> > +       size_t                   i;
> > +
> > +       if (len == 0) {
> > +               *dst = '\0';
> > +               return (0);
> > +       }
> > +
> > +       start = dst;
> > +       end = src + len;
> > +
> > +       while (src < end) {
> > +               if (utf8_open(&utf8data, *src)) {
> > +                       more = 1;
> > +                       while (++src < end && more)
> > +                               more = utf8_append(&utf8data, *src);
> > +                       if (!more) {
> > +                               /* UTF-8 character finished. */
> > +                               for (i = 0; i < utf8data.size; i++)
> > +                                       *dst++ = utf8data.data[i];
> > +                               continue;
> > +                       } else if (utf8data.have > 0) {
> > +                               /* Not a complete UTF-8 character. */
> > +                               src -= (utf8data.have - 1);
> > +                       }
> > +               }
> > +               if (src < end - 1)
> > +                       dst = vis(dst, src[0], flag, src[1]);
> > +               else if (src < end)
> > +                       dst = vis(dst, src[0], flag, '\0');
> > +               src++;
> > +
> > +       }
> > +
> > +       *dst = '\0';
> > +       return (dst - start);
> > +}
> >
> >
> >
> > On Sun, Mar 09, 2014 at 10:30:39AM +0900, Kosuke ASAMI wrote:
> >> I have tested, and it works fine!
> >>
> >> However, there is a mistake in my first patch.
> >> The flag needs to be cleared before next iteration. (like following diff)
> >>
> >> I have attached the complete patch.
> >>
> >> ----------
> >>
> >> diff --git a/utf8.c b/utf8.c
> >> index 10200dd..0465a32 100644
> >> --- a/utf8.c
> >> +++ b/utf8.c
> >> @@ -379,6 +379,7 @@ utf8_strvis(char *dst, const char *src, size_t
> >> len, int flag)
> >>   src++;
> >>   for (i = 0; i < utf8data.size; i++)
> >>   *dst++ = utf8data.data[i];
> >> + more = -1;
> >>   continue;
> >>   } else if (utf8data.have > 0) {
> >>   /* Not a complete UTF-8 character. */
> >>
> >> 2014-03-09 1:51 GMT+09:00 Nicholas Marriott <[email protected]>:
> >> > Hi
> >> >
> >> > Here's it with a little tidying up. I didn't test this but can you take
> >> > a look?
> >> >
> >> >
> >> > diff --git a/cmd-choose-buffer.c b/cmd-choose-buffer.c
> >> > index d79f6fd..a46db3e 100644
> >> > --- a/cmd-choose-buffer.c
> >> > +++ b/cmd-choose-buffer.c
> >> > @@ -45,6 +45,7 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
> >> > *cmdq)
> >> >         struct client                   *c;
> >> >         struct window_choose_data       *cdata;
> >> >         struct winlink                  *wl;
> >> > +       struct window_pane              *wp;
> >> >         struct paste_buffer             *pb;
> >> >         char                            *action, *action_data;
> >> >         const char                      *template;
> >> > @@ -61,6 +62,9 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
> >> > *cmdq)
> >> >         if ((wl = cmd_find_window(cmdq, args_get(args, 't'), NULL)) == 
> >> > NULL)
> >> >                 return (CMD_RETURN_ERROR);
> >> >
> >> > +       if (cmd_find_pane(cmdq, args_get(args, 't'), NULL, &wp) == NULL)
> >> > +               return (CMD_RETURN_ERROR);
> >> > +
> >> >         if (paste_get_top(&global_buffers) == NULL)
> >> >                 return (CMD_RETURN_NORMAL);
> >> >
> >> > @@ -79,7 +83,7 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
> >> > *cmdq)
> >> >
> >> >                 cdata->ft_template = xstrdup(template);
> >> >                 format_add(cdata->ft, "line", "%u", idx - 1);
> >> > -               format_paste_buffer(cdata->ft, pb);
> >> > +               format_paste_buffer(cdata->ft, wp, pb);
> >> >
> >> >                 xasprintf(&action_data, "%u", idx - 1);
> >> >                 cdata->command = cmd_template_replace(action, 
> >> > action_data, 1);
> >> > diff --git a/cmd-list-buffers.c b/cmd-list-buffers.c
> >> > index 02a4183..6a9fb5e 100644
> >> > --- a/cmd-list-buffers.c
> >> > +++ b/cmd-list-buffers.c
> >> > @@ -42,12 +42,16 @@ enum cmd_retval
> >> >  cmd_list_buffers_exec(unused struct cmd *self, struct cmd_q *cmdq)
> >> >  {
> >> >         struct args             *args = self->args;
> >> > +       struct window_pane      *wp;
> >> >         struct paste_buffer     *pb;
> >> >         struct format_tree      *ft;
> >> >         u_int                    idx;
> >> >         char                    *line;
> >> >         const char              *template;
> >> >
> >> > +       if (cmd_find_pane(cmdq, args_get(args, 't'), NULL, &wp) == NULL)
> >> > +               return (CMD_RETURN_ERROR);
> >> > +
> >> >         if ((template = args_get(args, 'F')) == NULL)
> >> >                 template = LIST_BUFFERS_TEMPLATE;
> >> >
> >> > @@ -55,7 +59,7 @@ cmd_list_buffers_exec(unused struct cmd *self, struct 
> >> > cmd_q *cmdq)
> >> >         while ((pb = paste_walk_stack(&global_buffers, &idx)) != NULL) {
> >> >                 ft = format_create();
> >> >                 format_add(ft, "line", "%u", idx - 1);
> >> > -               format_paste_buffer(ft, pb);
> >> > +               format_paste_buffer(ft, wp, pb);
> >> >
> >> >                 line = format_expand(ft, template);
> >> >                 cmdq_print(cmdq, "%s", line);
> >> > diff --git a/format.c b/format.c
> >> > index 497b5b5..05cd4ed 100644
> >> > --- a/format.c
> >> > +++ b/format.c
> >> > @@ -603,9 +603,10 @@ format_window_pane(struct format_tree *ft, struct 
> >> > window_pane *wp)
> >> >
> >> >  /* Set default format keys for paste buffer. */
> >> >  void
> >> > -format_paste_buffer(struct format_tree *ft, struct paste_buffer *pb)
> >> > +format_paste_buffer(struct format_tree *ft, struct window_pane *wp,
> >> > +    struct paste_buffer *pb)
> >> >  {
> >> > -       char    *pb_print = paste_print(pb, 50);
> >> > +       char    *pb_print = paste_print(pb, wp, 50);
> >> >
> >> >         format_add(ft, "buffer_size", "%zu", pb->size);
> >> >         format_add(ft, "buffer_sample", "%s", pb_print);
> >> > diff --git a/paste.c b/paste.c
> >> > index 28f1230..6101195 100644
> >> > --- a/paste.c
> >> > +++ b/paste.c
> >> > @@ -149,23 +149,26 @@ paste_replace(struct paste_stack *ps, u_int idx, 
> >> > char *data, size_t size)
> >> >
> >> >  /* Convert a buffer into a visible string. */
> >> >  char *
> >> > -paste_print(struct paste_buffer *pb, size_t width)
> >> > +paste_print(struct paste_buffer *pb, struct window_pane *wp, size_t 
> >> > width)
> >> >  {
> >> > -       char    *buf;
> >> > -       size_t   len, used;
> >> > +       char            *buf;
> >> > +       size_t           len, used;
> >> > +       const int        flags = VIS_OCTAL|VIS_TAB|VIS_NL;
> >> >
> >> >         if (width < 3)
> >> >                 width = 3;
> >> > -       buf = xmalloc(width * 4 + 1);
> >> > +       buf = xmalloc(width * 4 + 4); /* four bytes each plus "...\0" */
> >> >
> >> >         len = pb->size;
> >> >         if (len > width)
> >> >                 len = width;
> >> >
> >> > -       used = strvisx(buf, pb->data, len, VIS_OCTAL|VIS_TAB|VIS_NL);
> >> > +       if (options_get_number(&wp->window->options, "utf8"))
> >> > +               used = utf8_strvis(buf, pb->data, len, flags);
> >> > +       else
> >> > +               used = strvisx(buf, pb->data, len, flags);
> >> >         if (pb->size > width || used > width)
> >> > -               strlcpy(buf + width - 3, "...", 4);
> >> > -
> >> > +               strlcpy(buf + width, "...", 4);
> >> >         return (buf);
> >> >  }
> >> >
> >> > diff --git a/tmux.h b/tmux.h
> >> > index 5aac390..195c955 100644
> >> > --- a/tmux.h
> >> > +++ b/tmux.h
> >> > @@ -1540,7 +1540,7 @@ void               format_winlink(struct 
> >> > format_tree *, struct session *,
> >> >                      struct winlink *);
> >> >  void            format_window_pane(struct format_tree *,
> >> >                      struct window_pane *);
> >> > -void            format_paste_buffer(struct format_tree *,
> >> > +void            format_paste_buffer(struct format_tree *, struct 
> >> > window_pane *,
> >> >                      struct paste_buffer *);
> >> >
> >> >  /* mode-key.c */
> >> > @@ -1711,7 +1711,8 @@ int                paste_free_top(struct 
> >> > paste_stack *);
> >> >  int             paste_free_index(struct paste_stack *, u_int);
> >> >  void            paste_add(struct paste_stack *, char *, size_t, u_int);
> >> >  int             paste_replace(struct paste_stack *, u_int, char *, 
> >> > size_t);
> >> > -char           *paste_print(struct paste_buffer *, size_t);
> >> > +char           *paste_print(struct paste_buffer *, struct window_pane *,
> >> > +                    size_t);
> >> >  void            paste_send_pane(struct paste_buffer *, struct 
> >> > window_pane *,
> >> >                      const char *, int);
> >> >
> >> > @@ -2323,6 +2324,7 @@ int       utf8_open(struct utf8_data *, u_char);
> >> >  int    utf8_append(struct utf8_data *, u_char);
> >> >  u_int  utf8_combine(const struct utf8_data *);
> >> >  u_int  utf8_split2(u_int, u_char *);
> >> > +int    utf8_strvis(char *, const char *, size_t, int);
> >> >
> >> >  /* osdep-*.c */
> >> >  char           *osdep_get_name(int, char *);
> >> > diff --git a/utf8.c b/utf8.c
> >> > index 63723d7..10200dd 100644
> >> > --- a/utf8.c
> >> > +++ b/utf8.c
> >> > @@ -352,3 +352,42 @@ utf8_width(const struct utf8_data *utf8data)
> >> >         }
> >> >         return (1);
> >> >  }
> >> > +
> >> > +/*
> >> > + * Encode len characters from src into dst, which is guaranteed to have 
> >> > four
> >> > + * bytes available for each character from src (for \abc or UTF-8).
> >> > + */
> >> > +int
> >> > +utf8_strvis(char *dst, const char *src, size_t len, int flag)
> >> > +{
> >> > +       struct utf8_data         utf8data;
> >> > +       char                    *start;
> >> > +       int                      more;
> >> > +       size_t                   i;
> >> > +
> >> > +       for (start = dst; (dst - start) < len;) {
> >> > +               if (utf8_open(&utf8data, *src)) {
> >> > +                       while ((dst - start) + utf8data.have < len) {
> >> > +                               src++;
> >> > +
> >> > +                               more = utf8_append(&utf8data, *src);
> >> > +                               if (!more)
> >> > +                                       break;
> >> > +                       }
> >> > +                       if (!more) {
> >> > +                               /* UTF-8 character finished. */
> >> > +                               src++;
> >> > +                               for (i = 0; i < utf8data.size; i++)
> >> > +                                       *dst++ = utf8data.data[i];
> >> > +                               continue;
> >> > +                       } else if (utf8data.have > 0) {
> >> > +                               /* Not a complete UTF-8 character. */
> >> > +                               src -= (utf8data.have - 1);
> >> > +                       }
> >> > +               }
> >> > +               dst = vis(dst, src[0], flag, src[1]);
> >> > +               src++;
> >> > +       }
> >> > +       *dst = '\0';
> >> > +       return (dst - start);
> >> > +}
> >> >
> >> >
> >> >
> >> > On Sat, Mar 08, 2014 at 09:06:02PM +0900, Kosuke ASAMI wrote:
> >> >>    >*Arguments the wrong way round?
> >> >>    Sorry, I have fixed it.
> >> >>    I have moved the method from compat/vis.c to utf8.c and renamed it.
> >> >>
> >> >>    2014-03-08 18:25 GMT+09:00 Nicholas Marriott
> >> >>    <[1][email protected]>:
> >> >>
> >> >>      Hi
> >> >>
> >> >>      vis.c is only built on some platforms so the new function would be
> >> >>      better in utf8.c as something like utf8_strvis.
> >> >>
> >> >>      Also this looks wrong:
> >> >>
> >> >>      - * * * * * * * format_paste_buffer(ft, pb);
> >> >>      + * * * * * * * format_paste_buffer(wp, ft, pb);
> >> >>
> >> >>      Arguments the wrong way round?
> >> >>
> >> >>      On Sat, Mar 08, 2014 at 06:15:25PM +0900, Kosuke ASAMI wrote:
> >> >>      > * *I have implemented a better UTF-8 support for buffer_sample in
> >> >>      > * *choose-buffer and list-buffers.
> >> >>      >
> >> >>      > * *When an user enables UTF-8 on window option, choose-buffer and
> >> >>      > * *list-buffers show UTF-8 characters if so. Otherwise, they do 
> >> >> as
> >> >>      until now.
> >> >>
> >> >>      >
> >> >>      
> >> >> ------------------------------------------------------------------------------
> >> >>      > Subversion Kills Productivity. Get off Subversion & Make the 
> >> >> Move to
> >> >>      Perforce.
> >> >>      > With Perforce, you get hassle-free workflows. Merge that actually
> >> >>      works.
> >> >>      > Faster operations. Version large binaries. *Built-in WAN 
> >> >> optimization
> >> >>      and the
> >> >>      > freedom to use Git, Perforce or both. Make the move to Perforce.
> >> >>      >
> >> >>      
> >> >> [2]http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
> >> >>
> >> >>      > _______________________________________________
> >> >>      > tmux-users mailing list
> >> >>      > [3][email protected]
> >> >>      > [4]https://lists.sourceforge.net/lists/listinfo/tmux-users
> >> >>
> >> >> References
> >> >>
> >> >>    Visible links
> >> >>    1. mailto:[email protected]
> >> >>    2. 
> >> >> http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
> >> >>    3. mailto:[email protected]
> >> >>    4. https://lists.sourceforge.net/lists/listinfo/tmux-users
> >> >
> >> >



------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works. 
Faster operations. Version large binaries.  Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
_______________________________________________
tmux-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tmux-users

Re: [PATCH] better UTF-8 support for buffer_sample

Reply via email to