https://github.com/Szelethus updated https://github.com/llvm/llvm-project/pull/97407
From 9fed2b7dc5395f487cb91c10eb076bb87e05e9b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelet...@gmail.com> Date: Tue, 2 Jul 2024 12:58:19 +0200 Subject: [PATCH 1/4] [analyzer][NFC] Add some docs for LazyCompoundValue Yes, I basically copy-pasted some posts from discord and Artem's book, but these make for a rather decent docs. --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 3a4b087257149..e44cda50ef21d 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -326,6 +326,12 @@ class LocAsInteger : public NonLoc { static bool classof(SVal V) { return V.getKind() == LocAsIntegerKind; } }; +/// The simplest example of a concrete compound value is nonloc::CompoundVal, +/// which represents a concrete r-value of an initializer-list or a string. +/// Internally, it contains an llvm::ImmutableList of SVal's stored inside the +/// literal. +/// +/// Source: https://github.com/haoNoQ/clang-analyzer-guide, v0.1, section 5.3.2. class CompoundVal : public NonLoc { friend class ento::SValBuilder; @@ -346,6 +352,39 @@ class CompoundVal : public NonLoc { static bool classof(SVal V) { return V.getKind() == CompoundValKind; } }; +/// The simplest example of a concrete compound value is nonloc::CompoundVal, +/// which represents a concrete r-value of an initializer-list or a string. +/// Internally, it contains an llvm::ImmutableList of SVal's stored inside the +/// literal. +/// +/// However, there is another compound value used in the analyzer, which appears +/// much more often during analysis, which is nonloc::LazyCompoundVal. This +/// value is an r-value that represents a snapshot of any structure "as a whole" +/// at a given moment during the analysis. Such value is already quite far from +/// being re- ferred to as "concrete", as many fields inside it would be unknown +/// or symbolic. nonloc::LazyCompoundVal operates by storing two things: +/// * a reference to the TypedValueRegion being snapshotted (yes, it is always +/// typed), and also +/// * a copy of the whole Store object, obtained from the ProgramState in +/// which it was created. +/// +/// Essentially, nonloc::LazyCompoundVal is a performance optimization for the +/// analyzer. Because Store is immutable, creating a nonloc::LazyCompoundVal is +/// a very cheap operation. Note that the Store contains all region bindings in +/// the program state, not only related to the region. Later, if necessary, such +/// value can be unpacked -- eg. when it is assigned to another variable. +/// +/// Source: https://github.com/haoNoQ/clang-analyzer-guide, v0.1, section 5.3.2. +/// +/// If you ever need to see if a LazyCompoundVal is fully or partially +/// (un)initialized, you can iterBindings(). This is non-typechecked lookup +/// (i.e., you cannot figure out which specific sub-region is initialized by the +/// value you look at, you only get a byte offset). You can also improve +/// iterBindings() to make it possible to restrict the iteration to a single +/// cluster, because within the LazyCompoundVal’s Store only the cluster that +/// corresponds to the LazyCompoundVal’s parent region is relevant. +/// +/// Source: https://discourse.llvm.org/t/analyzer-for-the-undefined-value-of-array-element-the-tracking-information-is-incomplete/49372/2 class LazyCompoundVal : public NonLoc { friend class ento::SValBuilder; From 1980cea32848f889d63cc61444d63a00506b52f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelet...@gmail.com> Date: Tue, 16 Jul 2024 14:39:12 +0200 Subject: [PATCH 2/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Artem Dergachev <noqnoq...@gmail.com> Co-authored-by: Donát Nagy <donat.n...@ericsson.com> Co-authored-by: Balazs Benics <benicsbal...@gmail.com> --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index e44cda50ef21d..0ddc272de2891 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -352,21 +352,21 @@ class CompoundVal : public NonLoc { static bool classof(SVal V) { return V.getKind() == CompoundValKind; } }; -/// The simplest example of a concrete compound value is nonloc::CompoundVal, -/// which represents a concrete r-value of an initializer-list or a string. -/// Internally, it contains an llvm::ImmutableList of SVal's stored inside the -/// literal. -/// -/// However, there is another compound value used in the analyzer, which appears -/// much more often during analysis, which is nonloc::LazyCompoundVal. This +/// While nonloc::CompoundVal covers a few simple use cases, the nonloc::LazyCompoundVal +/// is a more performant and flexible way to represent an rvalue of record type, +/// so it shows up much more frequently during analysis. This /// value is an r-value that represents a snapshot of any structure "as a whole" /// at a given moment during the analysis. Such value is already quite far from -/// being re- ferred to as "concrete", as many fields inside it would be unknown +/// being referred to as "concrete", as many fields inside it would be unknown /// or symbolic. nonloc::LazyCompoundVal operates by storing two things: /// * a reference to the TypedValueRegion being snapshotted (yes, it is always /// typed), and also -/// * a copy of the whole Store object, obtained from the ProgramState in -/// which it was created. +/// * a reference to the whole Store object, obtained from the ProgramState in +/// which the nonloc::LazyCompoundVal was created. +/// +/// Note that the old ProgramState and its Store is kept alive during the +/// analysis because these are immutable functional data structures and each new +/// Store value is represented as "earlier Store" + "additional binding". /// /// Essentially, nonloc::LazyCompoundVal is a performance optimization for the /// analyzer. Because Store is immutable, creating a nonloc::LazyCompoundVal is @@ -374,15 +374,21 @@ class CompoundVal : public NonLoc { /// the program state, not only related to the region. Later, if necessary, such /// value can be unpacked -- eg. when it is assigned to another variable. /// -/// Source: https://github.com/haoNoQ/clang-analyzer-guide, v0.1, section 5.3.2. -/// -/// If you ever need to see if a LazyCompoundVal is fully or partially -/// (un)initialized, you can iterBindings(). This is non-typechecked lookup -/// (i.e., you cannot figure out which specific sub-region is initialized by the -/// value you look at, you only get a byte offset). You can also improve -/// iterBindings() to make it possible to restrict the iteration to a single -/// cluster, because within the LazyCompoundVal’s Store only the cluster that -/// corresponds to the LazyCompoundVal’s parent region is relevant. +/// If you ever need inspect the contents of the LazyCompoundVal, you can use +/// StoreManager::iterBindings(). It'll iterate through all values in the Store, +/// but you're only interested in the ones that belong to LazyCompoundVal::getRegion(); +/// other bindings are immaterial. +/// +/// NOTE: LazyCompoundVal::getRegion() itself is also immaterial. It is only an +/// implementation detail. LazyCompoundVal represents only the rvalue, the data (known or unknown) +/// that *was* stored in that region *at some point in the past*. The region should not be used for +/// any purpose other than figuring out what part of the frozen Store you're interested in. +/// The value does not represent the *current* value of that region. Sometimes it may, but +/// this should not be relied upon. Instead, if you want to figure out what region it represents, +/// you typically need to see where you got it from in the first place. +/// The region is absolutely not analogous to the C++ "this" pointer. +/// It is also not a valid way to "materialize" the prvalue into a glvalue in C++, because the region +/// represents the *old* storage (sometimes very old), not the *future* storage. /// /// Source: https://discourse.llvm.org/t/analyzer-for-the-undefined-value-of-array-element-the-tracking-information-is-incomplete/49372/2 class LazyCompoundVal : public NonLoc { From 23b79f32ae6e98d8f45702f7d7c712a4e6c418f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelet...@gmail.com> Date: Tue, 16 Jul 2024 14:45:20 +0200 Subject: [PATCH 3/4] Remove sources, format comments --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 45 +++++++++---------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 0ddc272de2891..172f1c01c2cf6 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -330,8 +330,6 @@ class LocAsInteger : public NonLoc { /// which represents a concrete r-value of an initializer-list or a string. /// Internally, it contains an llvm::ImmutableList of SVal's stored inside the /// literal. -/// -/// Source: https://github.com/haoNoQ/clang-analyzer-guide, v0.1, section 5.3.2. class CompoundVal : public NonLoc { friend class ento::SValBuilder; @@ -352,13 +350,14 @@ class CompoundVal : public NonLoc { static bool classof(SVal V) { return V.getKind() == CompoundValKind; } }; -/// While nonloc::CompoundVal covers a few simple use cases, the nonloc::LazyCompoundVal -/// is a more performant and flexible way to represent an rvalue of record type, -/// so it shows up much more frequently during analysis. This -/// value is an r-value that represents a snapshot of any structure "as a whole" -/// at a given moment during the analysis. Such value is already quite far from -/// being referred to as "concrete", as many fields inside it would be unknown -/// or symbolic. nonloc::LazyCompoundVal operates by storing two things: +/// While nonloc::CompoundVal covers a few simple use cases, +/// nonloc::LazyCompoundVal is a more performant and flexible way to represent +/// an rvalue of record type, so it shows up much more frequently during +/// analysis. This value is an r-value that represents a snapshot of any +/// structure "as a whole" at a given moment during the analysis. Such value is +/// already quite far from being referred to as "concrete", as many fields +/// inside it would be unknown or symbolic. nonloc::LazyCompoundVal operates by +/// storing two things: /// * a reference to the TypedValueRegion being snapshotted (yes, it is always /// typed), and also /// * a reference to the whole Store object, obtained from the ProgramState in @@ -376,21 +375,21 @@ class CompoundVal : public NonLoc { /// /// If you ever need inspect the contents of the LazyCompoundVal, you can use /// StoreManager::iterBindings(). It'll iterate through all values in the Store, -/// but you're only interested in the ones that belong to LazyCompoundVal::getRegion(); -/// other bindings are immaterial. -/// -/// NOTE: LazyCompoundVal::getRegion() itself is also immaterial. It is only an -/// implementation detail. LazyCompoundVal represents only the rvalue, the data (known or unknown) -/// that *was* stored in that region *at some point in the past*. The region should not be used for -/// any purpose other than figuring out what part of the frozen Store you're interested in. -/// The value does not represent the *current* value of that region. Sometimes it may, but -/// this should not be relied upon. Instead, if you want to figure out what region it represents, -/// you typically need to see where you got it from in the first place. -/// The region is absolutely not analogous to the C++ "this" pointer. -/// It is also not a valid way to "materialize" the prvalue into a glvalue in C++, because the region -/// represents the *old* storage (sometimes very old), not the *future* storage. +/// but you're only interested in the ones that belong to +/// LazyCompoundVal::getRegion(); other bindings are immaterial. /// -/// Source: https://discourse.llvm.org/t/analyzer-for-the-undefined-value-of-array-element-the-tracking-information-is-incomplete/49372/2 +/// NOTE: LazyCompoundVal::getRegion() itself is also immaterial. It is only an +/// implementation detail. LazyCompoundVal represents only the rvalue, the data +/// (known or unknown) that *was* stored in that region *at some point in the +/// past*. The region should not be used for any purpose other than figuring out +/// what part of the frozen Store you're interested in. The value does not +/// represent the *current* value of that region. Sometimes it may, but this +/// should not be relied upon. Instead, if you want to figure out what region it +/// represents, you typically need to see where you got it from in the first +/// place. The region is absolutely not analogous to the C++ "this" pointer. It +/// is also not a valid way to "materialize" the prvalue into a glvalue in C++, +/// because the region represents the *old* storage (sometimes very old), not +/// the *future* storage. class LazyCompoundVal : public NonLoc { friend class ento::SValBuilder; From ac2cf97fc63a4f6db2cfba5b38e8daeeb3bd5c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelet...@gmail.com> Date: Tue, 16 Jul 2024 14:48:51 +0200 Subject: [PATCH 4/4] Move LazyCompoundVal::getRegion() note to the actual function --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 172f1c01c2cf6..0aeb4a2e4ca81 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -378,18 +378,8 @@ class CompoundVal : public NonLoc { /// but you're only interested in the ones that belong to /// LazyCompoundVal::getRegion(); other bindings are immaterial. /// -/// NOTE: LazyCompoundVal::getRegion() itself is also immaterial. It is only an -/// implementation detail. LazyCompoundVal represents only the rvalue, the data -/// (known or unknown) that *was* stored in that region *at some point in the -/// past*. The region should not be used for any purpose other than figuring out -/// what part of the frozen Store you're interested in. The value does not -/// represent the *current* value of that region. Sometimes it may, but this -/// should not be relied upon. Instead, if you want to figure out what region it -/// represents, you typically need to see where you got it from in the first -/// place. The region is absolutely not analogous to the C++ "this" pointer. It -/// is also not a valid way to "materialize" the prvalue into a glvalue in C++, -/// because the region represents the *old* storage (sometimes very old), not -/// the *future* storage. +/// NOTE: LazyCompoundVal::getRegion() itself is also immaterial (see the actual +/// method docs for details). class LazyCompoundVal : public NonLoc { friend class ento::SValBuilder; @@ -399,6 +389,18 @@ class LazyCompoundVal : public NonLoc { } public: + /// This function itself is immaterial. It is only an implementation detail. + /// LazyCompoundVal represents only the rvalue, the data (known or unknown) + /// that *was* stored in that region *at some point in the past*. The region + /// should not be used for any purpose other than figuring out what part of + /// the frozen Store you're interested in. The value does not represent the + /// *current* value of that region. Sometimes it may, but this should not be + /// relied upon. Instead, if you want to figure out what region it represents, + /// you typically need to see where you got it from in the first place. The + /// region is absolutely not analogous to the C++ "this" pointer. It is also + /// not a valid way to "materialize" the prvalue into a glvalue in C++, + /// because the region represents the *old* storage (sometimes very old), not + /// the *future* storage. LLVM_ATTRIBUTE_RETURNS_NONNULL const LazyCompoundValData *getCVData() const { return castDataAs<LazyCompoundValData>(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits