https://gcc.gnu.org/g:435346eafadece9d045874ba000cb4a53f625f0c
commit r16-7853-g435346eafadece9d045874ba000cb4a53f625f0c Author: Robert Dubner <[email protected]> Date: Mon Mar 2 15:36:40 2026 -0500 cobol: Improved efficiency of code generated for MOVE "A" TO VAR(1:1). [119456] This PR rightly noted that COBOL source code which obviously could result in simple machine language did not. These changes take advantage of the compiler knowing, at compile time, the values of literal offsets and lengths, and uses that knowledge to generate much more efficient GENERIC for those cases. gcc/cobol/ChangeLog: PR cobol/119456 * genapi.cc (mh_source_is_literalA): Don't set refmod_e attribute unless it is necessary. (have_common_parent): Helper routine that determines whether two COBOL variables are members of the same data description. (mh_alpha_to_alpha): Modified for greater efficiency when table subscripts and reference modification parameters are numeric literals. * genutil.cc (get_data_offset): Recognizes when table subscripts and refmod offsets are numeric literals. (refer_size): Recognizes when refmod offsets are numeric literals. (refer_size_source): Recognizes when table subscripts are numeric literals. Diff: --- gcc/cobol/genapi.cc | 199 ++++++++++++++++++++++++++++++++++++++++++++------- gcc/cobol/genutil.cc | 176 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 315 insertions(+), 60 deletions(-) diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 01ea8cd5e013..4f71f9b1152b 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -15859,13 +15859,6 @@ mh_source_is_literalA(const cbl_refer_t &destref, cbl_encoding_t encoding_dest = destref.field->codeset.encoding; charmap_t *charmap_dest = __gg__get_charmap(encoding_dest); - if( destref.refmod.from - || destref.refmod.len ) - { - // Let the move routine know to treat the destination as alphanumeric - gg_attribute_bit_set(destref.field, refmod_e); - } - static char *buffer = NULL; static size_t buffer_size = 0; size_t source_length; @@ -16001,6 +15994,7 @@ mh_source_is_literalA(const cbl_refer_t &destref, } else { + // The refer has some information in it. gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"), refer_offset(destref)), build_string_literal(dest_bytes, src), @@ -16011,7 +16005,12 @@ mh_source_is_literalA(const cbl_refer_t &destref, else { // This is more complicated than a simple alpha-to-alpha move - + if( destref.refmod.from + || destref.refmod.len ) + { + // Let the move routine know to treat the destination as alphanumeric + gg_attribute_bit_set(destref.field, refmod_e); + } // If the source is flagged ALL, or if we are setting the destination to // a figurative constant, pass along the ALL bit: int rounded_parameter = rounded @@ -16044,19 +16043,82 @@ mh_source_is_literalA(const cbl_refer_t &destref, build_int_cst_type( SIZE_T, outlength), NULL_TREE); } + if( destref.refmod.from + || destref.refmod.len ) + { + // Return that value to its original form + gg_attribute_bit_clear(destref.field, refmod_e); + } } - if( destref.refmod.from - || destref.refmod.len ) - { - // Return that value to its original form - gg_attribute_bit_clear(destref.field, refmod_e); - } moved = true; } return moved; } +static bool +have_common_parent(const cbl_refer_t &destref, + const cbl_refer_t &sourceref) + { + /* We are trying to lay down fast code when possible. But sometimes we have + to go slower in order to be accurate. The COBOL specification explicitly + says that when the storage areas of sending and receiving operands + overlap: + 1) When the data items are not described by the same data description + entry, the result of the statement is undefined. + 2) When the data items are described by the same data description entry, + the result of the statement is the same as if the data items shared + no part of their respective storage areas. + + There is an additional paragraph: + In the case of reference modification, the unique data item produced by + reference modification is not considered to be the same data description + entry as any other data description entry. Therefore, if an overlapping + situation exists, the results of the operation are undefined. + + This routine will return TRUE when neither reference is a refmod, and + both operands ultimately have the same parent (indicating that they are + part of the same data description. + + The point is that when we return True, then the two are not refmods, and + they have a common parent, so we have to use a memmove. When we return + False, then we can use a faster memcpy. + */ + bool retval = true; + if( destref.is_refmod_reference() ) + { + retval = false; + } + else if( sourceref.is_refmod_reference() ) + { + retval = false; + } + else + { + // Neither is a refmod. Check for common parentage: + const cbl_field_t *poppa = destref.field; + const cbl_field_t *momma = sourceref.field; + while( parent_of(poppa) ) + { + // Follow the first family_tree up as far as we can. + poppa = parent_of(poppa); + } + while( parent_of(momma) ) + { + // Follow the second family_tree up as far as we can. + momma = parent_of(momma); + } + if( poppa != momma ) + { + /* Okay, so the analogy breaks down. Think of momma and poppa as + bacteria, or something. */ + retval = false; + } + } + + return retval; + } + static bool mh_alpha_to_alpha(const cbl_refer_t &destref, const cbl_refer_t &sourceref, @@ -16070,8 +16132,6 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, && destref.field->type == FldAlphanumeric && !size_error && sourceref.field->codeset.encoding == destref.field->codeset.encoding - && !destref.refmod.from - && !destref.refmod.len && !(destref.field->attr & rjust_e) && !(sourceref.field->attr & any_length_e) && !(destref.field->attr & any_length_e) @@ -16079,6 +16139,9 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, && !sourceref.all ) { + void (*mover)(tree, tree, tree); // dest, source, count + mover = have_common_parent(destref, sourceref) ? gg_memmove : gg_memcpy; + // We are in a position to simply move bytes from the source to the dest. if( refer_is_clean(sourceref) && refer_is_clean(destref) ) { @@ -16086,7 +16149,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, if( destref.field->data.capacity() <= sourceref.field->data.capacity() ) { // This is the simplest case of all - gg_memcpy(member( destref.field->var_decl_node, "data"), + mover(member( destref.field->var_decl_node, "data"), member(sourceref.field->var_decl_node, "data"), build_int_cst_type(SIZE_T, destref.field->data.capacity())); moved = true; @@ -16095,7 +16158,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, { // This is a tad more complicated. The source is too short, so we need // to copy over what we can... - gg_memcpy(member( destref.field->var_decl_node, "data"), + mover(member( destref.field->var_decl_node, "data"), member(sourceref.field->var_decl_node, "data"), build_int_cst_type(SIZE_T, sourceref.field->data.capacity())); // And then space-fill the rest: @@ -16109,7 +16172,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, charmap->mapped_character(ascii_space), fill_bytes); // ...and then copy those spaces into place. - gg_memcpy( + mover( gg_add(member(destref.field->var_decl_node, "data"), build_int_cst_type(SIZE_T, sourceref.field->data.capacity())), build_string_literal(fill_bytes, spaces), @@ -16118,10 +16181,96 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, moved = true; } } - else + + if( !refer_is_clean(sourceref) && refer_is_clean(destref) ) + { + // The source is dirty, but the destination is clean: + tree source_data; + tree source_len; + + tree dest_data; + tree dest_len; + + source_data = gg_add(member(sourceref.field->var_decl_node, "data"), + refer_offset(sourceref)); + source_len = refer_size_source(sourceref); + + dest_data = member(destref.field->var_decl_node, "data"); + + dest_len = build_int_cst_type(SIZE_T, destref.field->data.capacity()); + IF( source_len, ge_op, dest_len ) + { + // The source has enough (or more) bytes to fill the destination: + mover(dest_data, source_data, dest_len); + } + ELSE + { + // The source data is too short. We need to copy over what we have... + mover(dest_data, source_data, source_len); + + // And then right-fill the remainder with spaces. Create a buffer with + // more than enough spaces for our purposes: + size_t fill_bytes = destref.field->data.capacity(); + char *spaces = static_cast<char *>(xmalloc(fill_bytes)); + charmap_t *charmap =__gg__get_charmap(destref.field->codeset.encoding); + charmap->memset(spaces, + charmap->mapped_character(ascii_space), + fill_bytes); + // And then copy enough of those spaces into place. + mover(gg_add(dest_data, source_len), + build_string_literal(fill_bytes, spaces), + gg_subtract(dest_len, source_len)); + free(spaces); + } + ENDIF + moved = true; + } + if( refer_is_clean(sourceref) && !refer_is_clean(destref) ) + { + // The source is clean but the destination is dirty: + tree source_data; + tree source_len; + + tree dest_data; + tree dest_len ; + + source_data = member(sourceref.field->var_decl_node, "data"); + source_len = build_int_cst_type(SIZE_T, + sourceref.field->data.capacity()); + dest_data = gg_add(member(destref.field->var_decl_node, "data"), + refer_offset(destref)); + dest_len = refer_size_dest(destref); + IF( source_len, ge_op, dest_len ) + { + // The source has enough (or more) bytes to fill the destination: + mover(dest_data, source_data, dest_len); + } + ELSE + { + // The source data is too short. We need to copy over what we have... + mover(dest_data, source_data, source_len); + + // And then right-fill the remainder with spaces. Create a buffer with + // more than enough spaces for our purposes: + size_t fill_bytes = destref.field->data.capacity(); + char *spaces = static_cast<char *>(xmalloc(fill_bytes)); + charmap_t *charmap =__gg__get_charmap(destref.field->codeset.encoding); + charmap->memset(spaces, + charmap->mapped_character(ascii_space), + fill_bytes); + // And then copy enough of those spaces into place. + mover(gg_add(dest_data, source_len), + build_string_literal(fill_bytes, spaces), + gg_subtract(dest_len, source_len)); + free(spaces); + } + ENDIF + + moved = true; + } + if( !refer_is_clean(sourceref) && !refer_is_clean(destref) ) { - // Either the source or the dest is a table or refmod, so we need to do - // more work. + // Both the source and the dest are "dirty" tree source_data = gg_define_variable(UCHAR_P); tree source_len = gg_define_variable(SIZE_T); @@ -16140,12 +16289,12 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, IF( source_len, ge_op, dest_len ) { // The source has enough (or more) bytes to fill the destination: - gg_memcpy(dest_data, source_data, dest_len); + mover(dest_data, source_data, dest_len); } ELSE { // The source data is too short. We need to copy over what we have... - gg_memcpy(dest_data, source_data, source_len); + mover(dest_data, source_data, source_len); // And then right-fill the remainder with spaces. Create a buffer with // more than enough spaces for our purposes: @@ -16156,7 +16305,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref, charmap->mapped_character(ascii_space), fill_bytes); // And then copy enough of those spaces into place. - gg_memcpy(gg_add(dest_data, source_len), + mover(gg_add(dest_data, source_len), build_string_literal(fill_bytes, spaces), gg_subtract(dest_len, source_len)); free(spaces); diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 25f7b8070fa0..4f2f43809093 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -570,10 +570,99 @@ tree get_data_offset(const cbl_refer_t &refer, int *pflags = NULL) { - Analyze(); // This routine returns a tree which is the size_t offset to the data in the // refer/field + /* Let's first attempt to handle commonly-occurring situations that can + be handled efficiently. */ + + const cbl_enabled_exceptions_t &enabled_exceptions(cdf_enabled_exceptions()); + if( !enabled_exceptions.match(ec_bound_subscript_e) + && !enabled_exceptions.match(ec_bound_odo_e) + && !enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + // There is no subscript bounds checking + bool all_literals = true; + for( size_t i=0; i<refer.nsubscript(); i++ ) + { + if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e ) + { + // This refer is a figconst ZERO; we treat it as an ALL ZERO + // This is our internal representation for ALL, as in TABLE(ALL) + all_literals = false; + break; + } + if( !is_literal(refer.subscripts[i].field) ) + { + // A subscript is not a literal. Too bad. + all_literals = false; + break; + } + } + if( refer.refmod.from && !is_literal(refer.refmod.from->field) ) + { + all_literals = false; + } + if( all_literals ) + { + // We are dealing with foo(x)(y:z) where x and y are integer constants. + size_t offset = 0; + + if( refer.nsubscript() ) + { + // We have at least one subscript: + + // Figure we have three subscripts, so nsubscript is 3 + // Figure that the subscripts are {5, 4, 3} + + // We expect that starting from refer.field, that three of our ancestors -- + // call them A1, A2, and A3 -- have occurs clauses. + + // We need to start with the rightmost subscript, and work our way up through + // our parents. As we find each parent with an OCCURS, we increment qual_data + // by (subscript-1)*An->data.capacity() + + // Establish the field_t pointer for walking up through our ancestors: + cbl_field_t *parent = refer.field; + + // Note the backwards test, because refer->nsubscript is an unsigned value + for(size_t i=refer.nsubscript()-1; i<refer.nsubscript(); i-- ) + { + // We need to search upward for an ancestor with occurs_max: + while(parent) + { + if( parent->occurs.ntimes() ) + { + break; + } + parent = parent_of(parent); + } + // we might have an error condition at this point: + if( !parent ) + { + cbl_internal_error("Too many subscripts"); + } + // Pick up the integer value of the subscript. + long subscript = atol(refer.subscripts[i].field->data.original()); + + // Subscript is one-based integer + // Make it zero-based: + subscript = subscript - 1; + offset += subscript * parent->data.capacity(); + parent = parent_of(parent); + } + } + + if( refer.refmod.from ) + { + // We know the refmod is a literal + offset += (atol(refer.refmod.from->field->data.original()) - 1) + * refer.field->codeset.stride(); + return build_int_cst_type(SIZE_T, offset); + } + } + } + // Because this is for source / sending variables, checks are made for // OCCURS DEPENDING ON violations (when those exceptions are enabled) @@ -636,8 +725,6 @@ get_data_offset(const cbl_refer_t &refer, } else { - const cbl_enabled_exceptions_t& - enabled_exceptions( cdf_enabled_exceptions() ); if( !enabled_exceptions.match(ec_bound_subscript_e) ) { // With no exception testing, just pick up the value @@ -698,9 +785,6 @@ get_data_offset(const cbl_refer_t &refer, // Although we strictly don't need to look at the ODO value at this // point, we do want it checked for the purposes of ec-bound-odo - const cbl_enabled_exceptions_t& - enabled_exceptions( cdf_enabled_exceptions() ); - if( enabled_exceptions.match(ec_bound_odo_e) ) { if( parent->occurs.depending_on ) @@ -1933,41 +2017,50 @@ tree // size_t refer_size(const cbl_refer_t &refer, refer_type_t refer_type) { Analyze(); - static tree retval = gg_define_variable(SIZE_T, "..rs_retval", vs_file_static); - - if( !refer.field ) + if( refer.refmod.len && refer.refmod.len->field->type == FldLiteralN ) { - return size_t_zero_node; + return build_int_cst_type(SIZE_T, + atol( refer.refmod.len->field->data.original()) + * refer.field->codeset.stride()); } - - if( refer_is_clean(refer) ) + else { - return get_any_capacity(refer.field); - } + static tree retval = gg_define_variable(SIZE_T, "..rs_retval", vs_file_static); - // Step the first: Get the actual full length: + if( !refer.field ) + { + return size_t_zero_node; + } - if( refer_has_depends(refer, refer_type) ) - { - // Because there is a depends, we might have to change the length: - gg_assign(retval, refer_fill_depends(refer)); - } - else - { - gg_assign(retval, get_any_capacity(refer.field)); - } + if( refer_is_clean(refer) ) + { + return get_any_capacity(refer.field); + } - if( refer.refmod.from || refer.refmod.len ) - { - tree refmod = refer_refmod_length(refer); - // retval is the ODO based total length. - // refmod is the length resulting from refmod(from:len) - // We have to reduce retval by the effect of refmod: - tree diff = gg_subtract(get_any_capacity(refer.field), - refmod); - gg_assign(retval, gg_subtract(retval, diff)); + // Step the first: Get the actual full length: + + if( refer_has_depends(refer, refer_type) ) + { + // Because there is a depends, we might have to change the length: + gg_assign(retval, refer_fill_depends(refer)); + } + else + { + gg_assign(retval, get_any_capacity(refer.field)); + } + + if( refer.refmod.from || refer.refmod.len ) + { + tree refmod = refer_refmod_length(refer); + // retval is the ODO based total length. + // refmod is the length resulting from refmod(from:len) + // We have to reduce retval by the effect of refmod: + tree diff = gg_subtract(get_any_capacity(refer.field), + refmod); + gg_assign(retval, gg_subtract(retval, diff)); + } + return retval; } - return retval; } tree // size_t @@ -1989,7 +2082,6 @@ refer_size_source(const cbl_refer_t &refer) other. But there conceivably might be others,. You have been warned. - */ if( !refer.field ) @@ -2004,6 +2096,20 @@ refer_size_source(const cbl_refer_t &refer) return get_any_capacity(refer.field); } + // We are dealing with a refer + const cbl_enabled_exceptions_t& + enabled_exceptions( cdf_enabled_exceptions() ); + if( !enabled_exceptions.match(ec_bound_ref_mod_e) ) + { + // ref_mod bounds checking is off + if( refer.refmod.len && refer.refmod.len->field->type == FldLiteralN ) + { + // And the refmod.len is a literal. + return build_int_cst_type(SIZE_T, + atol( refer.refmod.len->field->data.original()) + * refer.field->codeset.stride()); } + } + // This assignment has to be here. Simply returning refer_size() results // in regression testing errors. static tree retval = gg_define_variable(SIZE_T, "..rss_retval", vs_file_static);
