Currently fmt assumes that 1 byte= 1 column which creates wrongly
formatted strings. Attached patch fixes it

-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko

diff --git a/src/fmt.c b/src/fmt.c
index 89d13a6..56f7c0b 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <getopt.h>
+#include <wchar.h>
 
 /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
    it to be a type get syntax errors for the variable declaration below.  */
@@ -135,6 +136,7 @@ struct Word
 
     const char *text;		/* the text of the word */
     int length;			/* length of this word */
+    int width;
     int space;			/* the size of the following space */
     unsigned int paren:1;	/* starts with open paren */
     unsigned int period:1;	/* ends in [.?!])* */
@@ -259,6 +261,42 @@ static int next_prefix_indent;
    paragraphs chosen by fmt_paragraph().  */
 static int last_line_length;
 
+static size_t
+get_display_width (const char *beg, const char *end)
+{
+  const char *ptr;
+  size_t r = 0;
+  mbstate_t ps;
+
+  memset (&ps, 0, sizeof (ps));
+
+  for (ptr = beg; *ptr && ptr < end; )
+    {
+      wchar_t wc;
+      size_t s;
+
+      s = mbrtowc (&wc, ptr, end - ptr, &ps);
+      if (s == (size_t) -1)
+	break;
+      if (s == (size_t) -2)
+	{
+	  ptr++;
+	  r++;
+	  continue;
+	}
+      if (wc == '\e' && ptr + 3 < end
+	  && ptr[1] == '[' && (ptr[2] == '0' || ptr[2] == '1')
+	  && ptr[3] == 'm')
+	{
+	  ptr += 4;
+	  continue;
+	}
+      r += wcwidth (wc);
+      ptr += s;
+    }
+  return r;
+}
+
 void
 usage (int status)
 {
@@ -669,7 +707,9 @@ get_line (FILE *f, int c)
           c = getc (f);
         }
       while (c != EOF && !isspace (c));
-      in_column += word_limit->length = wptr - word_limit->text;
+      word_limit->length = wptr - word_limit->text;
+      in_column += word_limit->width = get_display_width (word_limit->text,
+							  wptr);
       check_punctuation (word_limit);
 
       /* Scan inter-word space.  */
@@ -871,13 +911,13 @@ fmt_paragraph (void)
           if (w == word_limit)
             break;
 
-          len += (w - 1)->space + w->length;	/* w > start >= word */
+          len += (w - 1)->space + w->width;	/* w > start >= word */
         }
       while (len < max_width);
       start->best_cost = best + base_cost (start);
     }
 
-  word_limit->length = saved_length;
+  word_limit->width = saved_length;
 }
 
 /* Return the constant component of the cost of breaking before the
@@ -902,13 +942,13 @@ base_cost (WORD *this)
       else if ((this - 1)->punct)
         cost -= PUNCT_BONUS;
       else if (this > word + 1 && (this - 2)->final)
-        cost += WIDOW_COST ((this - 1)->length);
+        cost += WIDOW_COST ((this - 1)->width);
     }
 
   if (this->paren)
     cost -= PAREN_BONUS;
   else if (this->final)
-    cost += ORPHAN_COST (this->length);
+    cost += ORPHAN_COST (this->width);
 
   return cost;
 }
@@ -983,7 +1023,7 @@ put_word (WORD *w)
   s = w->text;
   for (n = w->length; n != 0; n--)
     putchar (*s++);
-  out_column += w->length;
+  out_column += w->width;
 }
 
 /* Output to stdout SPACE spaces, or equivalent tabs.  */

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to