As explained in the libstdc++ manual[1] and elsewhere[2], using tolower
and toupper in std::transform directly is wrong. If char is signed then
non-ASCII characters with negative values lead to undefined behaviour.
Also, tolower and toupper are overloaded names in C++ so expecting them
to resolve to a unique function pointer is unreliable. Finally, the
<cctype> header was included, not <ctype.h>, so they should have been
qualified as std::tolower and std::toupper.

[1] 
https://gcc.gnu.org/onlinedocs/libstdc++/manual/strings.html#strings.string.simple
[2] https://en.cppreference.com/w/cpp/string/byte/tolower

libgcobol/ChangeLog:

        * intrinsic.cc (is_zulu_format): Qualify toupper and cast
        argument to unsigned char.
        (fill_cobol_tm): Likewise.
        (iscasematch): Likewise for to lower.
        (numval): Qualify calls to tolower.
        (__gg__lower_case): Use lambda expression for
        tolower call.
        (__gg__upper_case): Likewise for toupper call.
        * libgcobol.cc (mangler_core): Cast tolower argument to unsigned
        char.
        * valconv.cc (__gg__string_to_numeric_edited): Cast to upper
        arguments to unsigned char.
---

As discussed on IRC, it might not be correct to use the single-argument
forms of tolower and toupper, which depend on the current locale. The
C++ two-argument form, std::toupper(int, const std::locale&), can use an
arbitrary locale so can be fixed to the "C" locale. I don't know if
that's needed. All this patch does is ensure no negative char values can
be passed to them, and ensure they are used portably so can be found by
name lookup.

 libgcobol/intrinsic.cc | 28 +++++++++++++++-------------
 libgcobol/libgcobol.cc |  2 +-
 libgcobol/valconv.cc   |  4 ++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc
index e3d255a29d6..345d3ac7352 100644
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -99,7 +99,7 @@ is_zulu_format(PCHAR left, PCHAR &right)
   bool retval = false;
   if( right > left )
     {
-    retval = toupper(*(right-1)) == internal_Z;
+    retval = std::toupper((unsigned char)*(right-1)) == internal_Z;
     }
   return retval;
   }
@@ -1984,7 +1984,8 @@ __gg__lower_case( cblc_field_t *dest,
   memset(dest->data, internal_space, dest_length);
   memcpy(dest->data, input->data+input_offset, std::min(dest_length, 
source_length));
   internal_to_ascii((char *)dest->data, dest_length);
-  std::transform(dest->data, dest->data + dest_length, dest->data, tolower);
+  std::transform(dest->data, dest->data + dest_length, dest->data,
+                [](unsigned char c) { return std::tolower(c); });
   ascii_to_internal_str((char *)dest->data, dest_length);
   }
 
@@ -2431,7 +2432,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'd' )
+        if( std::tolower(ch) == 'd' )
           {
           if( leading_sign )
             {
@@ -2439,7 +2440,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != 'b' )
             {
             goto done;
             }
@@ -2447,7 +2448,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'c' )
+        if( std::tolower(ch) == 'c' )
           {
           if( leading_sign )
             {
@@ -2455,7 +2456,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != 'r' )
             {
             goto done;
             }
@@ -2494,7 +2495,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'd' )
+        if( std::tolower(ch) == 'd' )
           {
           if( leading_sign )
             {
@@ -2502,7 +2503,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'b' )
+          if( p > pend || std::tolower(ch) != 'b' )
             {
             goto done;
             }
@@ -2510,7 +2511,7 @@ numval( cblc_field_t *dest,
           state = SPACE4;
           break;
           }
-        if( tolower(ch) == 'c' )
+        if( std::tolower(ch) == 'c' )
           {
           if( leading_sign )
             {
@@ -2518,7 +2519,7 @@ numval( cblc_field_t *dest,
             }
           ch = *p++;
           errpos += 1;
-          if( p > pend || tolower(ch) != 'r' )
+          if( p > pend || std::tolower(ch) != 'r' )
             {
             goto done;
             }
@@ -3736,7 +3737,8 @@ __gg__upper_case( cblc_field_t *dest,
   memset(dest->data, internal_space, dest_length);
   memcpy(dest->data, input->data+input_offset, std::min(dest_length, 
source_length));
   internal_to_ascii((char *)dest->data, dest_length);
-  std::transform(dest->data, dest->data + dest_length, dest->data, toupper);
+  std::transform(dest->data, dest->data + dest_length, dest->data,
+                [](unsigned char c) { return std::toupper(c); });
   ascii_to_internal_str((char *)dest->data, dest_length);
   }
 
@@ -4551,7 +4553,7 @@ fill_cobol_tm(cobol_tm &ctm,
     if( ch == internal_Z || ch == internal_z )
       {
       // This has to be the end of the road
-      if( toupper(source[0]) != 'Z' )
+      if( std::toupper((unsigned char)source[0]) != 'Z' )
         {
         retval += 0;
         break;
@@ -5054,7 +5056,7 @@ iscasematch(char *a1, char *a2, char *b1, char *b2)
   bool retval = true;
   while( a1 < a2 && b1 < b2 )
     {
-    if( tolower(*a1++) != tolower(*b1++) )
+    if( std::tolower((unsigned char)*a1++) != std::tolower((unsigned 
char)*b1++) )
       {
       retval = false;
       }
diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc
index 0890835822c..b990508b112 100644
--- a/libgcobol/libgcobol.cc
+++ b/libgcobol/libgcobol.cc
@@ -8826,7 +8826,7 @@ mangler_core(const char *s, const char *eos)
       }
     else
       {
-      *d++ = tolower(ch);
+      *d++ = tolower((unsigned char)ch);
       }
     }
   *d++ = NULLCH;
diff --git a/libgcobol/valconv.cc b/libgcobol/valconv.cc
index 6ddc3c00c00..1500d57051e 100644
--- a/libgcobol/valconv.cc
+++ b/libgcobol/valconv.cc
@@ -252,8 +252,8 @@ __gg__string_to_numeric_edited( char * const dest,
   if( dlength >= 2 )
     {
     // It's a positive number, so we might have to get rid of a CR or DB:
-    char ch1 = toupper(dest[dlength-2]);
-    char ch2 = toupper(dest[dlength-1]);
+    char ch1 = toupper((unsigned char)dest[dlength-2]);
+    char ch2 = toupper((unsigned char)dest[dlength-1]);
     if(     (ch1 == ascii_D && ch2 == ascii_B)
             ||  (ch1 == ascii_C && ch2 == ascii_R) )
       {
-- 
2.48.1

Reply via email to