substring_loc::get_location currently fails for the final terminator character in a STRING_CST from the C frontend, so that format_warning_va falls back to using the location of the string as a whole.
This patch tweaks things [1] so that we use the final closing quote as the location of the terminator character, as requested in PR preprocessor/77672. Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. Committed to trunk as r240434. [1] specifically, cpp_interpret_string_1. gcc/ChangeLog: PR preprocessor/77672 * input.c (selftest::test_lexer_string_locations_simple): Update test to expect location information of the terminator character at the location of the final closing quote. (selftest::test_lexer_string_locations_hex): Likewise. (selftest::test_lexer_string_locations_oct): Likewise. (selftest::test_lexer_string_locations_letter_escape_1): Likewise. (selftest::test_lexer_string_locations_letter_escape_2): Likewise. (selftest::test_lexer_string_locations_ucn4): Likewise. (selftest::test_lexer_string_locations_ucn8): Likewise. (selftest::test_lexer_string_locations_u8): Likewise. (selftest::test_lexer_string_locations_utf8_source): Likewise. (selftest::test_lexer_string_locations_concatenation_1): Likewise. (selftest::test_lexer_string_locations_concatenation_2): Likewise. (selftest::test_lexer_string_locations_concatenation_3): Likewise. (selftest::test_lexer_string_locations_macro): Likewise. (selftest::test_lexer_string_locations_long_line): Likewise. gcc/testsuite/ChangeLog: PR preprocessor/77672 * gcc.dg/plugin/diagnostic-test-string-literals-1.c (test_terminator_location): New function. libcpp/ChangeLog: PR preprocessor/77672 * charset.c (cpp_interpret_string_1): Add a source_range for the NUL-terminator, using the location of the trailing quote of the final string. --- gcc/input.c | 81 +++++++++++++--------- .../plugin/diagnostic-test-string-literals-1.c | 14 ++++ libcpp/charset.c | 9 ++- 3 files changed, 71 insertions(+), 33 deletions(-) diff --git a/gcc/input.c b/gcc/input.c index 55bff48..9b26378 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -2239,12 +2239,12 @@ test_lexer_string_locations_simple (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. */ - for (int i = 0; i <= 9; i++) + opening quote, but does include the closing quote. */ + for (int i = 0; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); } /* As test_lexer_string_locations_simple, but use an EBCDIC execution @@ -2339,14 +2339,14 @@ test_lexer_string_locations_hex (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. */ + opening quote, but does include the closing quote. */ for (int i = 0; i <= 4; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); - for (int i = 6; i <= 9; i++) + for (int i = 6; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); } /* Lex a string literal containing an octal-escaped character. @@ -2380,14 +2380,14 @@ test_lexer_string_locations_oct (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. */ + opening quote, but does include the closing quote. */ for (int i = 0; i < 5; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); - for (int i = 6; i <= 9; i++) + for (int i = 6; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); } /* Test of string literal containing letter escapes. */ @@ -2420,12 +2420,12 @@ test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 5, 1, 17, 18); - /* "bar". */ - for (int i = 6; i <= 8; i++) + /* "bar" and closing quote for nul-terminator. */ + for (int i = 6; i <= 9; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, i, 1, 13 + i, 13 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); } /* Another test of a string literal containing a letter escape. @@ -2455,7 +2455,11 @@ test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, 3, 1, 13, 14); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4); + /* Closing quote for nul-terminator. */ + ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, + 4, 1, 15, 15); + + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); } /* Lex a string literal containing UCN 4 characters. @@ -2498,6 +2502,7 @@ test_lexer_string_locations_ucn4 (const line_table_case &case_) 11 0x37 '7' 27 12 0x38 '8' 28 13 0x39 '9' 29 + 14 0x00 30 (closing quote) ----------- ---- ----- ------- ---------------. */ cpp_string dst_string; @@ -2510,7 +2515,7 @@ test_lexer_string_locations_ucn4 (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. + opening quote, but does include the closing quote. '01234'. */ for (int i = 0; i <= 4; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); @@ -2520,11 +2525,11 @@ test_lexer_string_locations_ucn4 (const line_table_case &case_) /* U+2175. */ for (int i = 8; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); - /* '789'. */ - for (int i = 11; i <= 13; i++) + /* '789' and nul terminator */ + for (int i = 11; i <= 14; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); } /* Lex a string literal containing UCN 8 characters. @@ -2561,7 +2566,7 @@ test_lexer_string_locations_ucn8 (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. + opening quote, but does include the closing quote. '01234'. */ for (int i = 0; i <= 4; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); @@ -2574,8 +2579,10 @@ test_lexer_string_locations_ucn8 (const line_table_case &case_) /* '789' at columns 35-37 */ for (int i = 11; i <= 13; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); + /* Closing quote/nul-terminator at column 38. */ + ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); } /* Fetch a big-endian 32-bit value and convert to host endianness. */ @@ -2751,8 +2758,8 @@ test_lexer_string_locations_u8 (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. */ - for (int i = 0; i <= 9; i++) + opening quote, but does include the closing quote. */ + for (int i = 0; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); } @@ -2829,13 +2836,15 @@ test_lexer_string_locations_utf8_source (const line_table_case &case_) free (const_cast <unsigned char *> (dst_string.text)); /* Verify ranges of individual characters. This no longer includes the - quotes. + opening quote, but does include the closing quote. Assuming that both source and execution encodings are UTF-8, we have - a run of 25 octets in each. */ + a run of 25 octets in each, plus the NUL terminator. */ for (int i = 0; i < 25; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); + /* NUL-terminator should use the closing quote at column 35. */ + ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); } /* Test of string literal concatenation. */ @@ -2881,12 +2890,14 @@ test_lexer_string_locations_concatenation_1 (const line_table_case &case_) location_t initial_loc = input_locs[0]; + /* "01234" on line 1. */ for (int i = 0; i <= 4; i++) ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); - for (int i = 5; i <= 9; i++) + /* "56789" in line 2, plus its closing quote for the nul terminator. */ + for (int i = 5; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10); + ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); } /* Another test of string literal concatenation. */ @@ -2958,7 +2969,10 @@ test_lexer_string_locations_concatenation_2 (const line_table_case &case_) ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, i + 1, 10 + j, 10 + j); - ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10); + /* NUL-terminator should use the final closing quote at line 5 column 12. */ + ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); + + ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); } /* Another test of string literal concatenation, this time combined with @@ -3009,7 +3023,10 @@ test_lexer_string_locations_concatenation_3 (const line_table_case &case_) for (int i = 7; i <= 9; i++) ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10); + /* NUL-terminator should use the location of the final closing quote. */ + ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); + + ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); } /* Test of string literal in a macro. */ @@ -3034,11 +3051,11 @@ test_lexer_string_locations_macro (const line_table_case &case_) /* Verify ranges of individual characters. We ought to see columns within the macro definition. */ - for (int i = 0; i <= 9; i++) + for (int i = 0; i <= 10; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, i, 1, 20 + i, 20 + i); - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); tok = test.get_token (); ASSERT_EQ (tok->type, CPP_PADDING); @@ -3132,8 +3149,8 @@ test_lexer_string_locations_long_line (const line_table_case &case_) return; /* Verify ranges of individual characters. */ - ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130); - for (int i = 0; i < 130; i++) + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); + for (int i = 0; i < 131; i++) ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, i, 2, 7 + i, 7 + i); } diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c index d5be021..3d5ff6d 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c @@ -209,3 +209,17 @@ test_macro (void) ~~~ { dg-end-multiline-output "" } */ } + +/* Verify that the location of the closing quote is used + for the location of the null terminating character. */ + +void +test_terminator_location (void) +{ + __emit_string_literal_range ("0123456789", /* { dg-warning "range" } */ + 10, 10, 10); +/* { dg-begin-multiline-output "" } + __emit_string_literal_range ("0123456789", + ^ + { dg-end-multiline-output "" } */ +} diff --git a/libcpp/charset.c b/libcpp/charset.c index 6a92ade..e77270a 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1538,9 +1538,9 @@ cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count, tbuf.len = 0; } + cpp_string_location_reader *loc_reader = NULL; for (i = 0; i < count; i++) { - cpp_string_location_reader *loc_reader = NULL; if (loc_readers) loc_reader = &loc_readers[i]; @@ -1652,6 +1652,13 @@ cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count, to->text = tbuf.text; to->len = tbuf.len; } + /* Use the location of the trailing quote as the location of the + NUL-terminator. */ + if (loc_reader) + { + source_range range = loc_reader->get_next (); + out->add_range (range); + } return true; -- 1.8.5.3