Author: Aaron Ballman
Date: 2025-04-30T10:14:18-04:00
New Revision: e8ae77947154e10dbc05cbb95ec9e10d3b0be13e

URL: 
https://github.com/llvm/llvm-project/commit/e8ae77947154e10dbc05cbb95ec9e10d3b0be13e
DIFF: 
https://github.com/llvm/llvm-project/commit/e8ae77947154e10dbc05cbb95ec9e10d3b0be13e.diff

LOG: [C] Add diagnostic + attr for unterminated strings (#137829)

This introduces three things related to intialization like:

char buf[3] = "foo";

where the array does not declare enough space for the null terminator
but otherwise can represent the array contents exactly.

1) An attribute named 'nonstring' which can be used to mark that a
   field or variable is not intended to hold string data.

2) -Wunterminated-string-initialization, which is grouped under
   -Wextra, and diagnoses the above construct unless the declaration
   uses the 'nonstring' attribute.

3) -Wc++-unterminated-string-initialization, which is grouped under
   -Wc++-compat, and diagnoses the above construct even if the
   declaration uses the 'nonstring' attribute.

Fixes #137705

Added: 
    clang/test/Sema/attr-nonstring.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/Attr.td
    clang/include/clang/Basic/AttrDocs.td
    clang/include/clang/Basic/DiagnosticGroups.td
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/lib/Sema/SemaInit.cpp
    clang/test/Misc/pragma-attribute-supported-attributes-list.test

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index bc68bb8b70b3d..1adb59a970329 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -171,6 +171,23 @@ C Language Changes
   ``-Wenum-conversion`` and ``-Wimplicit-int-enum-cast``. This conversion is an
   int-to-enum conversion because the enumeration on the right-hand side is
   promoted to ``int`` before the assignment.
+- Added ``-Wunterminated-string-initialization``, grouped under ``-Wextra``,
+  which diagnoses an initialization from a string literal where only the null
+  terminator cannot be stored. e.g.,
+
+  .. code-block:: c
+
+
+    char buf1[3] = "foo"; // -Wunterminated-string-initialization
+    char buf2[3] = "flarp"; // -Wexcess-initializers
+
+  This diagnostic can be suppressed by adding the new ``nonstring`` attribute
+  to the field or variable being initialized. #GH137705
+- Added ``-Wc++-unterminated-string-initialization``, grouped under
+  ``-Wc++-compat``, which also diagnoses the same cases as
+  ``-Wunterminated-string-initialization``. However, this diagnostic is not
+  silenced by the ``nonstring`` attribute as these initializations are always
+  incompatible with C++.
 
 C2y Feature Support
 ^^^^^^^^^^^^^^^^^^^

diff  --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index dcdcff8c46fe2..3e426bb685924 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5024,3 +5024,9 @@ def OpenACCRoutineDecl :InheritableAttr {
     void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) 
const;
   }];
 }
+
+def NonString : InheritableAttr {
+  let Spellings = [GCC<"nonstring">];
+  let Subjects = SubjectList<[Var, Field]>;
+  let Documentation = [NonStringDocs];
+}

diff  --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 6e2dbe5655189..cbb397cb31dfb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9294,3 +9294,18 @@ Declares that a function potentially allocates heap 
memory, and prevents any pot
 of ``nonallocating`` by the compiler.
   }];
 }
+
+def NonStringDocs : Documentation {
+  let Category = DocCatDecl;
+  let Content = [{
+The ``nonstring`` attribute can be applied to the declaration of a variable or
+a field whose type is a character array to specify that the character array is
+not intended to behave like a null-terminated string. This will silence
+diagnostics with code like:
+
+.. code-block:: c
+
+  char BadStr[3] = "foo"; // No space for the null terminator, diagnosed
+  __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
+  }];
+}

diff  --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index fc1ce197ef134..75e8fc541305b 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -156,6 +156,10 @@ def BuiltinRequiresHeader : 
DiagGroup<"builtin-requires-header">;
 def C99Compat : DiagGroup<"c99-compat">;
 def C23Compat : DiagGroup<"c23-compat">;
 def : DiagGroup<"c2x-compat", [C23Compat]>;
+def InitStringTooLongMissingNonString :
+  DiagGroup<"unterminated-string-initialization">;
+def InitStringTooLongForCpp :
+  DiagGroup<"c++-unterminated-string-initialization">;
 def HiddenCppDecl : DiagGroup<"c++-hidden-decl">;
 def DefaultConstInitUnsafe : DiagGroup<"default-const-init-unsafe">;
 def DefaultConstInit : DiagGroup<"default-const-init", 
[DefaultConstInitUnsafe]>;
@@ -163,7 +167,8 @@ def ImplicitVoidPtrCast : 
DiagGroup<"implicit-void-ptr-cast">;
 def ImplicitIntToEnumCast : DiagGroup<"implicit-int-enum-cast",
                                       [ImplicitEnumEnumCast]>;
 def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, DefaultConstInit,
-                                        ImplicitIntToEnumCast, HiddenCppDecl]>;
+                                        ImplicitIntToEnumCast, HiddenCppDecl,
+                                        InitStringTooLongForCpp]>;
 
 def ExternCCompat : DiagGroup<"extern-c-compat">;
 def KeywordCompat : DiagGroup<"keyword-compat">;
@@ -1143,6 +1148,7 @@ def Extra : DiagGroup<"extra", [
     StringConcatation,
     FUseLdPath,
     CastFunctionTypeMismatch,
+    InitStringTooLongMissingNonString,
   ]>;
 
 def Most : DiagGroup<"most", [

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ad5bf26be2590..823d4bb687497 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3804,6 +3804,9 @@ def err_attribute_weakref_without_alias : Error<
   "weakref declaration of %0 must also have an alias attribute">;
 def err_alias_not_supported_on_darwin : Error <
   "aliases are not supported on darwin">;
+def warn_attribute_non_character_array : Warning<
+  "%0%select{ attribute|}1 only applies to fields or variables of character "
+  "array type; type is %2">, InGroup<IgnoredAttributes>;
 def warn_attribute_wrong_decl_type_str : Warning<
   "%0%select{ attribute|}1 only applies to %2">, InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type_str : Error<
@@ -6404,6 +6407,15 @@ def err_initializer_string_for_char_array_too_long : 
Error<
 def ext_initializer_string_for_char_array_too_long : ExtWarn<
   "initializer-string for char array is too long">,
   InGroup<ExcessInitializers>;
+def warn_initializer_string_for_char_array_too_long_no_nonstring : Warning<
+  "initializer-string for character array is too long, array size is %0 but "
+  "initializer has size %1 (including the null terminating character); did you 
"
+  "mean to use the 'nonstring' attribute?">,
+  InGroup<InitStringTooLongMissingNonString>, DefaultIgnore;
+def warn_initializer_string_for_char_array_too_long_for_cpp : Warning<
+  "initializer-string for character array is too long for C++, array size is "
+  "%0 but initializer has size %1 (including the null terminating character)">,
+  InGroup<InitStringTooLongForCpp>, DefaultIgnore;
 def warn_missing_field_initializers : Warning<
   "missing field %0 initializer">,
   InGroup<MissingFieldInitializers>, DefaultIgnore;

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 8499141c30bce..ab66ae860f86b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4907,6 +4907,20 @@ void Sema::AddModeAttr(Decl *D, const 
AttributeCommonInfo &CI,
   D->addAttr(::new (Context) ModeAttr(Context, CI, Name));
 }
 
+static void handleNonStringAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  // This only applies to fields and variable declarations which have an array
+  // type.
+  QualType QT = cast<ValueDecl>(D)->getType();
+  if (!QT->isArrayType() ||
+      !QT->getBaseElementTypeUnsafe()->isAnyCharacterType()) {
+    S.Diag(D->getBeginLoc(), diag::warn_attribute_non_character_array)
+        << AL << AL.isRegularKeywordAttribute() << QT << AL.getRange();
+    return;
+  }
+
+  D->addAttr(::new (S.Context) NonStringAttr(S.Context, AL));
+}
+
 static void handleNoDebugAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) NoDebugAttr(S.Context, AL));
 }
@@ -7144,6 +7158,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, 
const ParsedAttr &AL,
   case ParsedAttr::AT_Mode:
     handleModeAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NonString:
+    handleNonStringAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_NonNull:
     if (auto *PVD = dyn_cast<ParmVarDecl>(D))
       handleNonNullAttrParameter(S, PVD, AL);

diff  --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index a85eca4d4ac3c..34766fd87af33 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -210,7 +210,8 @@ static void CheckC23ConstexprInitStringLiteral(const 
StringLiteral *SE,
                                                Sema &SemaRef, QualType &TT);
 
 static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
-                            Sema &S, bool CheckC23ConstexprInit = false) {
+                            Sema &S, const InitializedEntity &Entity,
+                            bool CheckC23ConstexprInit = false) {
   // Get the length of the string as parsed.
   auto *ConstantArrayTy =
       cast<ConstantArrayType>(Str->getType()->getAsArrayTypeUnsafe());
@@ -232,6 +233,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
   }
 
   const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
+  uint64_t ArrayLen = CAT->getZExtSize();
 
   // We have an array of character type with known size.  However,
   // the size may be smaller or larger than the string we are initializing.
@@ -247,16 +249,31 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
     }
 
     // [dcl.init.string]p2
-    if (StrLength > CAT->getZExtSize())
+    if (StrLength > ArrayLen)
       S.Diag(Str->getBeginLoc(),
              diag::err_initializer_string_for_char_array_too_long)
-          << CAT->getZExtSize() << StrLength << Str->getSourceRange();
+          << ArrayLen << StrLength << Str->getSourceRange();
   } else {
     // C99 6.7.8p14.
-    if (StrLength - 1 > CAT->getZExtSize())
+    if (StrLength - 1 > ArrayLen)
       S.Diag(Str->getBeginLoc(),
              diag::ext_initializer_string_for_char_array_too_long)
           << Str->getSourceRange();
+    else if (StrLength - 1 == ArrayLen) {
+      // If the entity being initialized has the nonstring attribute, then
+      // silence the "missing nonstring" diagnostic.
+      if (const ValueDecl *D = Entity.getDecl();
+          !D || !D->hasAttr<NonStringAttr>())
+        S.Diag(
+            Str->getBeginLoc(),
+            diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
+            << ArrayLen << StrLength << Str->getSourceRange();
+
+      // Always emit the C++ compatibility diagnostic.
+      S.Diag(Str->getBeginLoc(),
+             diag::warn_initializer_string_for_char_array_too_long_for_cpp)
+          << ArrayLen << StrLength << Str->getSourceRange();
+    }
   }
 
   // Set the type to the actual size that we are initializing.  If we have
@@ -1579,7 +1596,7 @@ void InitListChecker::CheckSubElementType(const 
InitializedEntity &Entity,
     if (IsStringInit(expr, arrayType, SemaRef.Context) == SIF_None) {
       // FIXME: Should we do this checking in verify-only mode?
       if (!VerifyOnly)
-        CheckStringInit(expr, ElemType, arrayType, SemaRef,
+        CheckStringInit(expr, ElemType, arrayType, SemaRef, Entity,
                         SemaRef.getLangOpts().C23 &&
                             initializingConstexprVariable(Entity));
       if (StructuredList)
@@ -2089,9 +2106,9 @@ void InitListChecker::CheckArrayType(const 
InitializedEntity &Entity,
       // constant for each string.
       // FIXME: Should we do these checks in verify-only mode too?
       if (!VerifyOnly)
-        CheckStringInit(IList->getInit(Index), DeclType, arrayType, SemaRef,
-                        SemaRef.getLangOpts().C23 &&
-                            initializingConstexprVariable(Entity));
+        CheckStringInit(
+            IList->getInit(Index), DeclType, arrayType, SemaRef, Entity,
+            SemaRef.getLangOpts().C23 && 
initializingConstexprVariable(Entity));
       if (StructuredList) {
         UpdateStructuredListElement(StructuredList, StructuredIndex,
                                     IList->getInit(Index));
@@ -8405,7 +8422,7 @@ ExprResult InitializationSequence::Perform(Sema &S,
       QualType Ty = Step->Type;
       bool UpdateType = ResultType && 
Entity.getType()->isIncompleteArrayType();
       CheckStringInit(CurInit.get(), UpdateType ? *ResultType : Ty,
-                      S.Context.getAsArrayType(Ty), S,
+                      S.Context.getAsArrayType(Ty), S, Entity,
                       S.getLangOpts().C23 &&
                           initializingConstexprVariable(Entity));
       break;

diff  --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test 
b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 55f196625770a..7affacb1a109a 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -133,6 +133,7 @@
 // CHECK-NEXT: NoThreadSafetyAnalysis (SubjectMatchRule_function)
 // CHECK-NEXT: NoThrow (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: NoUwtable (SubjectMatchRule_hasType_functionType)
+// CHECK-NEXT: NonString (SubjectMatchRule_variable, SubjectMatchRule_field)
 // CHECK-NEXT: NotTailCalled (SubjectMatchRule_function)
 // CHECK-NEXT: OMPAssume (SubjectMatchRule_function, 
SubjectMatchRule_objc_method)
 // CHECK-NEXT: OSConsumed (SubjectMatchRule_variable_is_parameter)

diff  --git a/clang/test/Sema/attr-nonstring.c 
b/clang/test/Sema/attr-nonstring.c
new file mode 100644
index 0000000000000..31728e701a3fc
--- /dev/null
+++ b/clang/test/Sema/attr-nonstring.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring 
-Wunterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,no-nonstring -Wextra 
%s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat 
-Wc++-unterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=noncxx,expected,compat -Wc++-compat %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx 
-Wno-unterminated-string-initialization %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,noncxx -Wc++-compat -Wextra 
-Wno-unterminated-string-initialization 
-Wno-c++-unterminated-string-initialization %s
+
+char foo[3] = "foo"; // no-nonstring-warning {{initializer-string for 
character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                        compat-warning {{initializer-string for character 
array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                        cxx-error {{initializer-string for char array is too 
long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+__attribute__((nonstring)) char bar[3] = "bar"; // compat-warning 
{{initializer-string for character array is too long for C++, array size is 3 
but initializer has size 4 (including the null terminating character)}} \
+                                                   cxx-error 
{{initializer-string for char array is too long, array size is 3 but 
initializer has size 4 (including the null terminating character)}}
+
+struct S {
+  char buf[3];
+  __attribute__((nonstring)) char fub[3];
+} s = { "baz", "bop" }; // no-nonstring-warning {{initializer-string for 
character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                           compat-warning 2 {{initializer-string for character 
array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                           cxx-error 2 {{initializer-string for char array is 
too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+
+// Show that we also issue the diagnostic for the other character types as 
well.
+signed char scfoo[3] = "foo";    // no-nonstring-warning {{initializer-string 
for character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                                    compat-warning {{initializer-string for 
character array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                                    cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+unsigned char ucfoo[3] = "foo";  // no-nonstring-warning {{initializer-string 
for character array is too long, array size is 3 but initializer has size 4 
(including the null terminating character); did you mean to use the 'nonstring' 
attribute?}} \
+                                    compat-warning {{initializer-string for 
character array is too long for C++, array size is 3 but initializer has size 4 
(including the null terminating character)}} \
+                                    cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+
+// wchar_t (et al) are a bit funny in that it cannot do string init in C++ at
+// all, so they are not tested.
+
+// Show that we properly diagnose incorrect uses of nonstring.
+__attribute__((nonstring)) void func(void);       // expected-warning 
{{'nonstring' attribute only applies to variables and non-static data members}}
+__attribute__((nonstring("test"))) char eek1[2];  // expected-error 
{{'nonstring' attribute takes no arguments}}
+__attribute__((nonstring)) int eek2;              // expected-warning 
{{'nonstring' attribute only applies to fields or variables of character array 
type; type is 'int'}}
+
+// Note, these diagnostics are separate from the "too many initializers"
+// diagnostic when you overwrite more than just the null terminator.
+char too_big[3] = "this is too big"; // noncxx-warning {{initializer-string 
for char array is too long}} \
+                                        cxx-error {{initializer-string for 
char array is too long, array size is 3 but initializer has size 16 (including 
the null terminating character)}}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to