Jim Meyering wrote: > I'm open to all reasonable solutions, especially when accompanied > with sample code.
This is the proposed sample code: the 'expand' program. Here the core of the program is in the single function expand(). The proposed solution is like this. It uses a set of macros, which - in uppercase - are like the lowercase functions/macros that are already present in gnulib. (Attached to this mail.) The patch is relative to coreutils-6.11. The transformation of the code from using 'char' to using 'MBF_CHAR' took me less than half an hour. You could do the 'unexpand' program in another half an hour. 2008-04-22 Bruno Haible <[EMAIL PROTECTED]> Make 'expand' work in multibyte locales. * src/expandloop.h: New file, extracted from src/expand.c. * src/expand.c: Include expandloop.h twice. (expand): Dispatch between multibyte and unibyte locales. *** src/expand.c.bak 2008-04-19 23:34:23.000000000 +0200 --- src/expand.c 2008-04-22 03:53:20.000000000 +0200 *************** *** 267,367 **** /* Change tabs to spaces, writing to stdout. Read each file in `file_list', in order. */ static void expand (void) { ! /* Input stream. */ ! FILE *fp = next_file (NULL); ! ! if (!fp) ! return; ! ! for (;;) ! { ! /* Input character, or EOF. */ ! int c; ! ! /* If true, perform translations. */ ! bool convert = true; ! ! ! /* The following variables have valid values only when CONVERT ! is true: */ ! ! /* Column of next input character. */ ! uintmax_t column = 0; ! ! /* Index in TAB_LIST of next tab stop to examine. */ ! size_t tab_index = 0; ! ! ! /* Convert a line of text. */ ! ! do ! { ! while ((c = getc (fp)) < 0 && (fp = next_file (fp))) ! continue; ! ! if (convert) ! { ! if (c == '\t') ! { ! /* Column the next input tab stop is on. */ ! uintmax_t next_tab_column; ! ! if (tab_size) ! next_tab_column = column + (tab_size - column % tab_size); ! else ! for (;;) ! if (tab_index == first_free_tab) ! { ! next_tab_column = column + 1; ! break; ! } ! else ! { ! uintmax_t tab = tab_list[tab_index++]; ! if (column < tab) ! { ! next_tab_column = tab; ! break; ! } ! } ! ! if (next_tab_column < column) ! error (EXIT_FAILURE, 0, _("input line is too long")); ! ! while (++column < next_tab_column) ! if (putchar (' ') < 0) ! error (EXIT_FAILURE, errno, _("write error")); ! ! c = ' '; ! } ! else if (c == '\b') ! { ! /* Go back one column, and force recalculation of the ! next tab stop. */ ! column -= !!column; ! tab_index -= !!tab_index; ! } ! else ! { ! column++; ! if (!column) ! error (EXIT_FAILURE, 0, _("input line is too long")); ! } ! ! convert &= convert_entire_line | !! isblank (c); ! } ! ! if (c < 0) ! return; ! ! if (putchar (c) < 0) ! error (EXIT_FAILURE, errno, _("write error")); ! } ! while (c != '\n'); ! } } int --- 267,295 ---- /* Change tabs to spaces, writing to stdout. Read each file in `file_list', in order. */ + #if HAVE_MBRTOWC + # define FUNC expand_multi + # include "mbfile_multi.h" + # include "expandloop.h" + # include "mbfile_undef.h" + # undef FUNC + #endif + + #define FUNC expand_8bit + #include "mbfile_8bit.h" + #include "expandloop.h" + #include "mbfile_undef.h" + #undef FUNC + static void expand (void) { ! #if HAVE_MBRTOWC ! if (MB_CUR_MAX > 1) ! expand_multi (); ! else ! #endif ! expand_8bit (); } int *** /dev/null 2003-09-23 19:59:22.000000000 +0200 --- src/expandloop.h 2008-04-22 03:52:50.000000000 +0200 *************** *** 0 **** --- 1,119 ---- + /* Working loop for expand. + Copyright (C) 89, 91, 1995-2006, 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + + static void + FUNC (void) + { + /* Input stream. */ + FILE *fp = next_file (NULL); + + if (!fp) + return; + + for (;;) + { + MB_FILE mfp; + + /* Input character, or EOF. */ + MBF_CHAR c; + + /* If true, perform translations. */ + bool convert = true; + + + /* The following variables have valid values only when CONVERT + is true: */ + + /* Column of next input character. */ + uintmax_t column = 0; + + /* Index in TAB_LIST of next tab stop to examine. */ + size_t tab_index = 0; + + + MBF_INIT (mfp, fp); + + /* Convert a line of text. */ + + do + { + while ((MBF_GETC (c, mfp), MB_ISEOF (c)) && (fp = next_file (fp))) + continue; + + if (convert) + { + if (MB_ISEQ (c, '\t')) + { + /* Column the next input tab stop is on. */ + uintmax_t next_tab_column; + + if (tab_size) + next_tab_column = column + (tab_size - column % tab_size); + else + for (;;) + if (tab_index == first_free_tab) + { + next_tab_column = column + 1; + break; + } + else + { + uintmax_t tab = tab_list[tab_index++]; + if (column < tab) + { + next_tab_column = tab; + break; + } + } + + if (next_tab_column < column) + error (EXIT_FAILURE, 0, _("input line is too long")); + + while (++column < next_tab_column) + if (putchar (' ') < 0) + error (EXIT_FAILURE, errno, _("write error")); + + MB_SETASCII (c, ' '); + } + else if (MB_ISEQ (c, '\b')) + { + /* Go back one column, and force recalculation of the + next tab stop. */ + column -= !!column; + tab_index -= !!tab_index; + } + else + { + uintmax_t new_column = column + MB_WIDTH (c); + + if (new_column < column) + error (EXIT_FAILURE, 0, _("input line is too long")); + column = new_column; + } + + convert &= convert_entire_line | !! MB_ISBLANK (c); + } + + if (MB_ISEOF (c)) + return; + + MB_PUTC (c, stdout); + if (ferror (stdout) < 0) + error (EXIT_FAILURE, errno, _("write error")); + } + while (! MB_ISEQ (c, '\n')); + } + }
mb_switched.tar.gz
Description: application/tgz
_______________________________________________ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils