On 12.02.2012 22:59, Bruno Haible wrote:
> Hi Vladimir,
>
> Thank you for the proposed patch.
>
>> As already reported several years ago
> I cannot find it in my archives. Maybe that discussion already contained
> some useful thoughts or arguments? Can you please point me to it?
http://lists.gnu.org/archive/html/bug-tar/2009-09/msg00008.html
>> argp counts bytes even when
>> actually what matters is the display length. This patch improves the
>> situation by counting only leading and standalone UTF-8 bytes. It
>> doesn't handle the double-width characters like Chinese sinograms
> A program that needs to consider display length - for example for
> line wrapping - should
>   1) work with any locale encoding. Don't assume that the locale encoding
>      is UTF-8.
>   2) work with Chinese ideographs correctly, like it should also work
>      with Russian (single-width) letters.
Here you go. Tested on Cyrillic. Haven't tested with Chinese.
>
> The easiest way to satisfy these two requirements is to base the code on
> either
>   * the function mbswidth (gnulib module mbswidth) and possibly also mbiter
>     or mbuiter, or
>   * the gnulib module unilbrk/ulc-width-linebreaks, it contains a complete
>     line-breaking algorithm.
>
> Can you rewrite your patch to this effect?
>
> Also, such tricky issues should be checked in the test suite. Can you
> please also provide a test program, some input data, and the expected
> output for this data? We can then turn it into a gnulib test.
I can strip down one of our programs to keep just --help when time permits.
> Bruno
>
>


-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko

=== modified file 'grub-core/gnulib/argp-fmtstream.c'
--- grub-core/gnulib/argp-fmtstream.c	2010-04-02 22:45:01 +0000
+++ grub-core/gnulib/argp-fmtstream.c	2012-02-12 22:37:14 +0000
@@ -29,6 +29,7 @@
 #include <errno.h>
 #include <stdarg.h>
 #include <ctype.h>
+#include <wchar.h>
 
 #include "argp-fmtstream.h"
 #include "argp-namefrob.h"
@@ -116,6 +117,54 @@
 #endif
 #endif
 
+
+size_t
+__argp_get_display_len (char *beg, char *end)
+{
+  char *ptr;
+  size_t r = 0;
+  mbstate_t ps;
+
+  memset (&ps, 0, sizeof (ps));
+
+  for (ptr = beg; ptr < end; )
+    {
+      wchar_t wc;
+      size_t s;
+
+      s = mbrtowc (&wc, ptr, end - ptr, &ps);
+      if (s == (size_t) -1)
+	break;
+      r += wcwidth (wc);
+      ptr += s;
+    }
+  return r;
+}
+
+static inline char *
+add_length (char *ptr, char *end, size_t l)
+{
+  mbstate_t ps;
+
+  memset (&ps, 0, sizeof (ps));
+
+  while (ptr < end)
+    {
+      wchar_t wc;
+      size_t s, k;
+
+      s = mbrtowc (&wc, ptr, end - ptr, &ps);
+      if (s == (size_t) -1)
+	break;
+      k = wcwidth (wc);
+      if (k >= l)
+	break;
+      l -= k;
+      ptr += s;
+    }
+  return ptr;
+}
+
 /* Process FS's buffer so that line wrapping is done from POINT_OFFS to the
    end of its buffer.  This code is mostly from glibc stdio/linewrap.c.  */
 void
@@ -168,14 +217,15 @@
 
       if (!nl)
         {
+	  size_t display_len = __argp_get_display_len (buf, fs->p);
           /* The buffer ends in a partial line.  */
 
-          if (fs->point_col + len < fs->rmargin)
+          if (fs->point_col + display_len < fs->rmargin)
             {
               /* The remaining buffer text is a partial line and fits
                  within the maximum line width.  Advance point for the
                  characters to be written and stop scanning.  */
-              fs->point_col += len;
+              fs->point_col += display_len;
               break;
             }
           else
@@ -183,14 +233,18 @@
                the end of the buffer.  */
             nl = fs->p;
         }
-      else if (fs->point_col + (nl - buf) < (ssize_t) fs->rmargin)
-        {
-          /* The buffer contains a full line that fits within the maximum
-             line width.  Reset point and scan the next line.  */
-          fs->point_col = 0;
-          buf = nl + 1;
-          continue;
-        }
+      else
+	{
+	  size_t display_len = __argp_get_display_len (buf, nl);
+	  if (display_len < (ssize_t) fs->rmargin)
+	    {
+	      /* The buffer contains a full line that fits within the maximum
+		 line width.  Reset point and scan the next line.  */
+	      fs->point_col = 0;
+	      buf = nl + 1;
+	      continue;
+	    }
+	}
 
       /* This line is too long.  */
       r = fs->rmargin - 1;
@@ -226,7 +280,7 @@
           char *p, *nextline;
           int i;
 
-          p = buf + (r + 1 - fs->point_col);
+	  p = add_length (buf, fs->p, (r + 1 - fs->point_col));
           while (p >= buf && !isblank ((unsigned char) *p))
             --p;
           nextline = p + 1;     /* This will begin the next line.  */
@@ -244,7 +298,7 @@
             {
               /* A single word that is greater than the maximum line width.
                  Oh well.  Put it on an overlong line by itself.  */
-              p = buf + (r + 1 - fs->point_col);
+              p = add_length (buf, fs->p, (r + 1 - fs->point_col));
               /* Find the end of the long word.  */
               if (p < nl)
                 do
@@ -278,7 +332,7 @@
               && fs->p > nextline)
             {
               /* The margin needs more blanks than we removed.  */
-              if (fs->end - fs->p > fs->wmargin + 1)
+              if (__argp_get_display_len (fs->p, fs->end) > fs->wmargin + 1)
                 /* Make some space for them.  */
                 {
                   size_t mv = fs->p - nextline;

=== modified file 'grub-core/gnulib/argp-fmtstream.h'
--- grub-core/gnulib/argp-fmtstream.h	2010-04-02 22:45:01 +0000
+++ grub-core/gnulib/argp-fmtstream.h	2012-02-12 22:18:14 +0000
@@ -335,6 +335,9 @@
   return __fs->point_col >= 0 ? __fs->point_col : 0;
 }
 
+size_t
+__argp_get_display_len (char *beg, char *end);
+
 #if !_LIBC
 #undef __argp_fmtstream_putc
 #undef __argp_fmtstream_puts

=== modified file 'grub-core/gnulib/argp-help.c'
--- grub-core/gnulib/argp-help.c	2010-04-02 22:45:01 +0000
+++ grub-core/gnulib/argp-help.c	2012-02-12 22:19:05 +0000
@@ -1448,7 +1448,7 @@
 
       /* Manually do line wrapping so that it (probably) won't get wrapped at
          any embedded spaces.  */
-      space (stream, 1 + nl - cp);
+      space (stream, 1 + __argp_get_display_len (cp, nl));
 
       __argp_fmtstream_write (stream, cp, nl - cp);
     }

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to