Re: [PATCH] better UTF-8 support for buffer_sample

Kosuke ASAMI Sun, 09 Mar 2014 08:49:16 -0700

The first zero check of 'len' seem to be unnecessary.

I have fixed the decrement number from 'src' when an UTF-8 character
doesn't finish.


2014-03-09 18:16 GMT+09:00 Nicholas Marriott <[email protected]>:
> Actually I think this function is wrong because it reads over the end of
> src. len is the length of src, not dst. We know there is always enough
> space in dst.
>
> I haven't time now to look at this closely or test it but maybe
> something like:
>
> diff --git a/utf8.c b/utf8.c
> index 63723d7..b1f44bf 100644
> --- a/utf8.c
> +++ b/utf8.c
> @@ -352,3 +352,51 @@ utf8_width(const struct utf8_data *utf8data)
>         }
>         return (1);
>  }
> +
> +/*
> + * Encode len characters from src into dst, which is guaranteed to have four
> + * bytes available for each character from src (for \abc or UTF-8) plus space
> + * for \0.
> + */
> +int
> +utf8_strvis(char *dst, const char *src, size_t len, int flag)
> +{
> +       struct utf8_data         utf8data;
> +       const char              *start, *end;
> +       int                      more;
> +       size_t                   i;
> +
> +       if (len == 0) {
> +               *dst = '\0';
> +               return (0);
> +       }
> +
> +       start = dst;
> +       end = src + len;
> +
> +       while (src < end) {
> +               if (utf8_open(&utf8data, *src)) {
> +                       more = 1;
> +                       while (++src < end && more)
> +                               more = utf8_append(&utf8data, *src);
> +                       if (!more) {
> +                               /* UTF-8 character finished. */
> +                               for (i = 0; i < utf8data.size; i++)
> +                                       *dst++ = utf8data.data[i];
> +                               continue;
> +                       } else if (utf8data.have > 0) {
> +                               /* Not a complete UTF-8 character. */
> +                               src -= (utf8data.have - 1);
> +                       }
> +               }
> +               if (src < end - 1)
> +                       dst = vis(dst, src[0], flag, src[1]);
> +               else if (src < end)
> +                       dst = vis(dst, src[0], flag, '\0');
> +               src++;
> +
> +       }
> +
> +       *dst = '\0';
> +       return (dst - start);
> +}
>
>
>
> On Sun, Mar 09, 2014 at 10:30:39AM +0900, Kosuke ASAMI wrote:
>> I have tested, and it works fine!
>>
>> However, there is a mistake in my first patch.
>> The flag needs to be cleared before next iteration. (like following diff)
>>
>> I have attached the complete patch.
>>
>> ----------
>>
>> diff --git a/utf8.c b/utf8.c
>> index 10200dd..0465a32 100644
>> --- a/utf8.c
>> +++ b/utf8.c
>> @@ -379,6 +379,7 @@ utf8_strvis(char *dst, const char *src, size_t
>> len, int flag)
>>   src++;
>>   for (i = 0; i < utf8data.size; i++)
>>   *dst++ = utf8data.data[i];
>> + more = -1;
>>   continue;
>>   } else if (utf8data.have > 0) {
>>   /* Not a complete UTF-8 character. */
>>
>> 2014-03-09 1:51 GMT+09:00 Nicholas Marriott <[email protected]>:
>> > Hi
>> >
>> > Here's it with a little tidying up. I didn't test this but can you take
>> > a look?
>> >
>> >
>> > diff --git a/cmd-choose-buffer.c b/cmd-choose-buffer.c
>> > index d79f6fd..a46db3e 100644
>> > --- a/cmd-choose-buffer.c
>> > +++ b/cmd-choose-buffer.c
>> > @@ -45,6 +45,7 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
>> > *cmdq)
>> >         struct client                   *c;
>> >         struct window_choose_data       *cdata;
>> >         struct winlink                  *wl;
>> > +       struct window_pane              *wp;
>> >         struct paste_buffer             *pb;
>> >         char                            *action, *action_data;
>> >         const char                      *template;
>> > @@ -61,6 +62,9 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
>> > *cmdq)
>> >         if ((wl = cmd_find_window(cmdq, args_get(args, 't'), NULL)) == 
>> > NULL)
>> >                 return (CMD_RETURN_ERROR);
>> >
>> > +       if (cmd_find_pane(cmdq, args_get(args, 't'), NULL, &wp) == NULL)
>> > +               return (CMD_RETURN_ERROR);
>> > +
>> >         if (paste_get_top(&global_buffers) == NULL)
>> >                 return (CMD_RETURN_NORMAL);
>> >
>> > @@ -79,7 +83,7 @@ cmd_choose_buffer_exec(struct cmd *self, struct cmd_q 
>> > *cmdq)
>> >
>> >                 cdata->ft_template = xstrdup(template);
>> >                 format_add(cdata->ft, "line", "%u", idx - 1);
>> > -               format_paste_buffer(cdata->ft, pb);
>> > +               format_paste_buffer(cdata->ft, wp, pb);
>> >
>> >                 xasprintf(&action_data, "%u", idx - 1);
>> >                 cdata->command = cmd_template_replace(action, action_data, 
>> > 1);
>> > diff --git a/cmd-list-buffers.c b/cmd-list-buffers.c
>> > index 02a4183..6a9fb5e 100644
>> > --- a/cmd-list-buffers.c
>> > +++ b/cmd-list-buffers.c
>> > @@ -42,12 +42,16 @@ enum cmd_retval
>> >  cmd_list_buffers_exec(unused struct cmd *self, struct cmd_q *cmdq)
>> >  {
>> >         struct args             *args = self->args;
>> > +       struct window_pane      *wp;
>> >         struct paste_buffer     *pb;
>> >         struct format_tree      *ft;
>> >         u_int                    idx;
>> >         char                    *line;
>> >         const char              *template;
>> >
>> > +       if (cmd_find_pane(cmdq, args_get(args, 't'), NULL, &wp) == NULL)
>> > +               return (CMD_RETURN_ERROR);
>> > +
>> >         if ((template = args_get(args, 'F')) == NULL)
>> >                 template = LIST_BUFFERS_TEMPLATE;
>> >
>> > @@ -55,7 +59,7 @@ cmd_list_buffers_exec(unused struct cmd *self, struct 
>> > cmd_q *cmdq)
>> >         while ((pb = paste_walk_stack(&global_buffers, &idx)) != NULL) {
>> >                 ft = format_create();
>> >                 format_add(ft, "line", "%u", idx - 1);
>> > -               format_paste_buffer(ft, pb);
>> > +               format_paste_buffer(ft, wp, pb);
>> >
>> >                 line = format_expand(ft, template);
>> >                 cmdq_print(cmdq, "%s", line);
>> > diff --git a/format.c b/format.c
>> > index 497b5b5..05cd4ed 100644
>> > --- a/format.c
>> > +++ b/format.c
>> > @@ -603,9 +603,10 @@ format_window_pane(struct format_tree *ft, struct 
>> > window_pane *wp)
>> >
>> >  /* Set default format keys for paste buffer. */
>> >  void
>> > -format_paste_buffer(struct format_tree *ft, struct paste_buffer *pb)
>> > +format_paste_buffer(struct format_tree *ft, struct window_pane *wp,
>> > +    struct paste_buffer *pb)
>> >  {
>> > -       char    *pb_print = paste_print(pb, 50);
>> > +       char    *pb_print = paste_print(pb, wp, 50);
>> >
>> >         format_add(ft, "buffer_size", "%zu", pb->size);
>> >         format_add(ft, "buffer_sample", "%s", pb_print);
>> > diff --git a/paste.c b/paste.c
>> > index 28f1230..6101195 100644
>> > --- a/paste.c
>> > +++ b/paste.c
>> > @@ -149,23 +149,26 @@ paste_replace(struct paste_stack *ps, u_int idx, 
>> > char *data, size_t size)
>> >
>> >  /* Convert a buffer into a visible string. */
>> >  char *
>> > -paste_print(struct paste_buffer *pb, size_t width)
>> > +paste_print(struct paste_buffer *pb, struct window_pane *wp, size_t width)
>> >  {
>> > -       char    *buf;
>> > -       size_t   len, used;
>> > +       char            *buf;
>> > +       size_t           len, used;
>> > +       const int        flags = VIS_OCTAL|VIS_TAB|VIS_NL;
>> >
>> >         if (width < 3)
>> >                 width = 3;
>> > -       buf = xmalloc(width * 4 + 1);
>> > +       buf = xmalloc(width * 4 + 4); /* four bytes each plus "...\0" */
>> >
>> >         len = pb->size;
>> >         if (len > width)
>> >                 len = width;
>> >
>> > -       used = strvisx(buf, pb->data, len, VIS_OCTAL|VIS_TAB|VIS_NL);
>> > +       if (options_get_number(&wp->window->options, "utf8"))
>> > +               used = utf8_strvis(buf, pb->data, len, flags);
>> > +       else
>> > +               used = strvisx(buf, pb->data, len, flags);
>> >         if (pb->size > width || used > width)
>> > -               strlcpy(buf + width - 3, "...", 4);
>> > -
>> > +               strlcpy(buf + width, "...", 4);
>> >         return (buf);
>> >  }
>> >
>> > diff --git a/tmux.h b/tmux.h
>> > index 5aac390..195c955 100644
>> > --- a/tmux.h
>> > +++ b/tmux.h
>> > @@ -1540,7 +1540,7 @@ void               format_winlink(struct format_tree 
>> > *, struct session *,
>> >                      struct winlink *);
>> >  void            format_window_pane(struct format_tree *,
>> >                      struct window_pane *);
>> > -void            format_paste_buffer(struct format_tree *,
>> > +void            format_paste_buffer(struct format_tree *, struct 
>> > window_pane *,
>> >                      struct paste_buffer *);
>> >
>> >  /* mode-key.c */
>> > @@ -1711,7 +1711,8 @@ int                paste_free_top(struct paste_stack 
>> > *);
>> >  int             paste_free_index(struct paste_stack *, u_int);
>> >  void            paste_add(struct paste_stack *, char *, size_t, u_int);
>> >  int             paste_replace(struct paste_stack *, u_int, char *, 
>> > size_t);
>> > -char           *paste_print(struct paste_buffer *, size_t);
>> > +char           *paste_print(struct paste_buffer *, struct window_pane *,
>> > +                    size_t);
>> >  void            paste_send_pane(struct paste_buffer *, struct window_pane 
>> > *,
>> >                      const char *, int);
>> >
>> > @@ -2323,6 +2324,7 @@ int       utf8_open(struct utf8_data *, u_char);
>> >  int    utf8_append(struct utf8_data *, u_char);
>> >  u_int  utf8_combine(const struct utf8_data *);
>> >  u_int  utf8_split2(u_int, u_char *);
>> > +int    utf8_strvis(char *, const char *, size_t, int);
>> >
>> >  /* osdep-*.c */
>> >  char           *osdep_get_name(int, char *);
>> > diff --git a/utf8.c b/utf8.c
>> > index 63723d7..10200dd 100644
>> > --- a/utf8.c
>> > +++ b/utf8.c
>> > @@ -352,3 +352,42 @@ utf8_width(const struct utf8_data *utf8data)
>> >         }
>> >         return (1);
>> >  }
>> > +
>> > +/*
>> > + * Encode len characters from src into dst, which is guaranteed to have 
>> > four
>> > + * bytes available for each character from src (for \abc or UTF-8).
>> > + */
>> > +int
>> > +utf8_strvis(char *dst, const char *src, size_t len, int flag)
>> > +{
>> > +       struct utf8_data         utf8data;
>> > +       char                    *start;
>> > +       int                      more;
>> > +       size_t                   i;
>> > +
>> > +       for (start = dst; (dst - start) < len;) {
>> > +               if (utf8_open(&utf8data, *src)) {
>> > +                       while ((dst - start) + utf8data.have < len) {
>> > +                               src++;
>> > +
>> > +                               more = utf8_append(&utf8data, *src);
>> > +                               if (!more)
>> > +                                       break;
>> > +                       }
>> > +                       if (!more) {
>> > +                               /* UTF-8 character finished. */
>> > +                               src++;
>> > +                               for (i = 0; i < utf8data.size; i++)
>> > +                                       *dst++ = utf8data.data[i];
>> > +                               continue;
>> > +                       } else if (utf8data.have > 0) {
>> > +                               /* Not a complete UTF-8 character. */
>> > +                               src -= (utf8data.have - 1);
>> > +                       }
>> > +               }
>> > +               dst = vis(dst, src[0], flag, src[1]);
>> > +               src++;
>> > +       }
>> > +       *dst = '\0';
>> > +       return (dst - start);
>> > +}
>> >
>> >
>> >
>> > On Sat, Mar 08, 2014 at 09:06:02PM +0900, Kosuke ASAMI wrote:
>> >>    >*Arguments the wrong way round?
>> >>    Sorry, I have fixed it.
>> >>    I have moved the method from compat/vis.c to utf8.c and renamed it.
>> >>
>> >>    2014-03-08 18:25 GMT+09:00 Nicholas Marriott
>> >>    <[1][email protected]>:
>> >>
>> >>      Hi
>> >>
>> >>      vis.c is only built on some platforms so the new function would be
>> >>      better in utf8.c as something like utf8_strvis.
>> >>
>> >>      Also this looks wrong:
>> >>
>> >>      - * * * * * * * format_paste_buffer(ft, pb);
>> >>      + * * * * * * * format_paste_buffer(wp, ft, pb);
>> >>
>> >>      Arguments the wrong way round?
>> >>
>> >>      On Sat, Mar 08, 2014 at 06:15:25PM +0900, Kosuke ASAMI wrote:
>> >>      > * *I have implemented a better UTF-8 support for buffer_sample in
>> >>      > * *choose-buffer and list-buffers.
>> >>      >
>> >>      > * *When an user enables UTF-8 on window option, choose-buffer and
>> >>      > * *list-buffers show UTF-8 characters if so. Otherwise, they do as
>> >>      until now.
>> >>
>> >>      >
>> >>      
>> >> ------------------------------------------------------------------------------
>> >>      > Subversion Kills Productivity. Get off Subversion & Make the Move 
>> >> to
>> >>      Perforce.
>> >>      > With Perforce, you get hassle-free workflows. Merge that actually
>> >>      works.
>> >>      > Faster operations. Version large binaries. *Built-in WAN 
>> >> optimization
>> >>      and the
>> >>      > freedom to use Git, Perforce or both. Make the move to Perforce.
>> >>      >
>> >>      
>> >> [2]http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
>> >>
>> >>      > _______________________________________________
>> >>      > tmux-users mailing list
>> >>      > [3][email protected]
>> >>      > [4]https://lists.sourceforge.net/lists/listinfo/tmux-users
>> >>
>> >> References
>> >>
>> >>    Visible links
>> >>    1. mailto:[email protected]
>> >>    2. 
>> >> http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
>> >>    3. mailto:[email protected]
>> >>    4. https://lists.sourceforge.net/lists/listinfo/tmux-users
>> >
>> >

utf8_buffers_4.patch
Description: Binary data

------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works. 
Faster operations. Version large binaries.  Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk

_______________________________________________
tmux-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tmux-users

Re: [PATCH] better UTF-8 support for buffer_sample

Reply via email to