Update of bug #66051 (group groff): Status: None => Postponed
_______________________________________________________ Follow-up Comment #7: Rut-roh, Raggy. So, yeah, let's put in the missing bits. diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp index 44e0981f6..6d9ee85c6 100644 --- a/src/roff/troff/input.cpp +++ b/src/roff/troff/input.cpp @@ -7275,12 +7275,15 @@ static void set_character_flags() skip_line(); } +// We use this to mint hyphenation codes for special characters. +static int hyphenation_code_counter = 256; + static void set_hyphenation_codes() { tok.skip(); if (tok.is_newline() || tok.is_eof()) { - warning(WARN_MISSING, "hyphenation code configuration request" - " expects arguments"); + warning(WARN_MISSING, "hyphenation code assignment request expects" + " arguments"); skip_line(); return; } @@ -7309,24 +7312,38 @@ static void set_hyphenation_codes() error("cannot use the hyphenation code of a numeral"); break; } - unsigned char new_code = 0; // TODO: int + int new_code = 0; charinfo *cisrc = tok.get_char(); - if (csrc != 0) - new_code = csrc; - else { + if (cisrc != 0 /* nullptr */) + // Common case: assign destination character the hyphenation code + // of another character that already has one. + new_code = cisrc->get_hyphenation_code(); + if (0 == csrc) { if (0 /* nullptr */ == cisrc) { error("expected ordinary or special character, got %1", tok.description()); break; } break; } // source character is special - if (0 == cisrc->get_hyphenation_code()) { - error("second member of hyphenation code pair must be an" - " ordinary character, or a special character already" - " assigned a hyphenation code"); - break; + if (strcmp(cidst->nm.contents(), cisrc->nm.contents()) == 0) { + debug("GBR: mint new"); + if (hyphenation_code_counter == INT_MAX) { + error("supply of hyphenation codes exhausted"); + break; + } + new_code = hyphenation_code_counter++; + debug("GBR: new code is %1", int(new_code)); } - new_code = cisrc->get_hyphenation_code(); + else { + debug("GBR: copy code"); + new_code = cisrc->get_hyphenation_code(); + } + } + else { + // If assigning a code to itself, use its character encoding value + // to mint a new code. + if (csrc == cdst) + new_code = tok.ch(); } cidst->set_hyphenation_code(new_code); if (cidst->get_translation() No problem. Compiles fine. Let's test it. $ nl EXPERIMENTS/phcode-works.groff 1 .phcode a 2 .phcode A 3 .phcode $ 4 .tm invoking .hcode \['a] $ 5 .hcode \['a] $ 6 .phcode \['a] 7 .tm invoking .hcode \['a] \['a] 8 .hcode \['a] \['a] 9 .phcode \['a] 10 .tm invoking .hcode \['a] \['A] 11 .hcode \['a] \['A] 12 .phcode \['a] 13 .phcode \[vS] $ ./build/test-groff EXPERIMENTS/phcode-works.groff a 97 A 97 $ 0 invoking .hcode \['a] $ \['a] 0 invoking .hcode \['a] \['a] troff:EXPERIMENTS/phcode-works.groff:8: debug: GBR: mint new troff:EXPERIMENTS/phcode-works.groff:8: debug: GBR: new code is 256 \['a] 0 invoking .hcode \['a] \['A] troff:EXPERIMENTS/phcode-works.groff:11: debug: GBR: copy code \['a] 225 \[vS] 0 "new code is 256"--great! Next line... \['a] 0 Wha..? Well, yeah. Because the hyphenation code is a `char`, so when we assigned an `int` to it, the upper bits got masked off, and this isn't warned about because pfffffffft, we're C/C++ programmers--we live close to the metal where no one ever worries about narrowing type conversions. 😐 All right, fine, a hyphenation code is just a numeric type. Let's widen it. diff --git a/src/roff/troff/charinfo.h b/src/roff/troff/charinfo.h index 02be5aa95..6b05c1dc2 100644 --- a/src/roff/troff/charinfo.h +++ b/src/roff/troff/charinfo.h @@ -76,7 +76,7 @@ public: int prohibit_break_before(); int prohibit_break_after(); int inter_char_space(); - unsigned char get_hyphenation_code(); + int get_hyphenation_code(); unsigned char get_ascii_code(); unsigned char get_asciify_code(); int get_unicode_code(); @@ -212,7 +212,7 @@ inline charinfo *charinfo::get_translation(int transparent_throughput) : translation); } -inline unsigned char charinfo::get_hyphenation_code() +inline int charinfo::get_hyphenation_code() { return hyphenation_code; } diff --git a/src/roff/troff/env.cpp b/src/roff/troff/env.cpp index ca69e38fa..8d80ad2ba 100644 --- a/src/roff/troff/env.cpp +++ b/src/roff/troff/env.cpp @@ -3634,7 +3634,7 @@ static void add_hyphenation_exceptions() skip_line(); return; } - char buf[WORD_MAX + 1]; + int buf[WORD_MAX + 1]; unsigned char pos[WORD_MAX + 2]; for (;;) { tok.skip(); @@ -3655,7 +3655,7 @@ static void add_hyphenation_exceptions() pos[npos++] = i; } else { - unsigned char c = ci->get_hyphenation_code(); + int c = ci->get_hyphenation_code(); if (c == 0) break; buf[i++] = c; @@ -3664,9 +3664,9 @@ static void add_hyphenation_exceptions() if (i > 0) { pos[npos] = 0; buf[i] = 0; - unsigned char *tem = new unsigned char[npos + 1]; + int *tem = new int[npos + 1]; memcpy(tem, pos, npos + 1); - tem = (unsigned char *)current_language->exceptions.lookup(symbol(buf), + tem = (int *)current_language->exceptions.lookup(symbol(buf), tem); if (tem) delete[] tem; Easy-peasy. Now... $ make -C build troff make: Entering directory '/home/branden/src/GIT/groff/build' CXX src/roff/troff/env.o ../src/roff/troff/env.cpp: In function ‘void add_hyphenation_exceptions()’: ../src/roff/troff/env.cpp:3669:66: error: no matching function for call to ‘symbol::symbol(int [257])’ 3669 | tem = (int *)current_language->exceptions.lookup(symbol(buf), | ^ In file included from ../src/include/color.h:22, from ../src/roff/troff/troff.h:32, from ../src/roff/troff/env.cpp:19: ../src/include/symbol.h:46:8: note: candidate: ‘symbol::symbol()’ 46 | inline symbol::symbol() : s(0) | ^~~~~~ ../src/include/symbol.h:46:8: note: candidate expects 0 arguments, 1 provided ../src/include/symbol.h:32:3: note: candidate: ‘symbol::symbol(const char*, int)’ 32 | symbol(const char *p, int how = 0); | ^~~~~~ ../src/include/symbol.h:32:22: note: no known conversion for argument 1 from ‘int [257]’ to ‘const char*’ 32 | symbol(const char *p, int how = 0); | ~~~~~~~~~~~~^ ../src/include/symbol.h:24:7: note: candidate: ‘constexpr symbol::symbol(const symbol&)’ 24 | class symbol { | ^~~~~~ ../src/include/symbol.h:24:7: note: no known conversion for argument 1 from ‘int [257]’ to ‘const symbol&’ ../src/include/symbol.h:24:7: note: candidate: ‘constexpr symbol::symbol(symbol&&)’ ../src/include/symbol.h:24:7: note: no known conversion for argument 1 from ‘int [257]’ to ‘symbol&&’ make: *** [Makefile:8689: src/roff/troff/env.o] Error 1 make: Leaving directory '/home/branden/src/GIT/groff/build' All right, it's our old friend the `symbol` class, which is used all over the place. Suddenly following this thread around the corner reveals the hulking beast known as "change _groff_'s internal character representation to a wide type", which is also a prerequisite for internal Unicode support. So, uh, this one's screeching to a halt for 1.24. I'll land what I can (without changing any classes), declare defeat, and move on to the next formatter change. _______________________________________________________ Reply to this item at: <https://savannah.gnu.org/bugs/?66051> _______________________________________________ Message sent via Savannah https://savannah.gnu.org/
signature.asc
Description: PGP signature