commit:     b846c59c1e2ad80163745de024154cbe845fedaa
Author:     Oskari Pirhonen <xxc3ncoredxx <AT> gmail <DOT> com>
AuthorDate: Mon Feb 27 02:05:39 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Mon Feb 27 04:43:48 2023 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=b846c59c

90config-impl-decl: bug fixes

- Match "-Werror=impl..." from gcc
- Use separate RE to check for UTF-8 and ASCII quoting when extracting
  the function name

Signed-off-by: Oskari Pirhonen <xxc3ncoredxx <AT> gmail.com>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 bin/install-qa-check.d/90config-impl-decl | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/bin/install-qa-check.d/90config-impl-decl 
b/bin/install-qa-check.d/90config-impl-decl
index 2fb8307ea..d1bc0e067 100644
--- a/bin/install-qa-check.d/90config-impl-decl
+++ b/bin/install-qa-check.d/90config-impl-decl
@@ -38,6 +38,12 @@ find_log_targets() {
                find -files0-from - -type f \( "${find_args[@]}" \) -print0
 }
 
+has_utf8_ctype() {
+       # Use python to check if the locale is UTF-8 since tools like locale(1) 
may
+       # not exist (eg, musl systems).
+       [[ "$("${PORTAGE_PYTHON:-/usr/bin/python}" -c 'import locale; 
print(locale.getlocale()[1])')" == UTF-8 ]]
+}
+
 config_impl_decl_check() {
        local files=()
        local lines=()
@@ -46,19 +52,32 @@ config_impl_decl_check() {
        local entry
        local line
        local func
-       local re=" function '([[:print:]]+)'"
+       local re_uni
+       local re_asc
+       local is_utf8
+
+       # Given the UTF-8 character type, both gcc and clang may enclose the
+       # function name between the LEFT SINGLE QUOTATION MARK and RIGHT SINGLE
+       # QUOTATION MARK codepoints.
+       re_uni=$' function \u2018([^\u2019]+)\u2019'
+
+       # This variant matches ASCII single quotes.
+       re_asc=$' function \x27([^\x27]+)\x27'
+
+       # Is UTF-8 the effective character type?
+       has_utf8_ctype; is_utf8=$(( $? == 0 ))
 
        # Iterate over every log file found and check for 
'-Wimplicit-function-declaration'
        while IFS= read -rd '' l; do
                while IFS= read -ru3 entry; do
                        # Strip ANSI codes (color and erase in line have been 
seen at least)
-                       entry="$(printf '%s\n' "${entry}" | sed -E -e 
$'s/\033\[[0-9;]*[A-Za-z]//g')"
+                       entry="$(printf '%s\n' "${entry}" | LC_ALL='C' sed -E 
-e $'s/\033\[[0-9;]*[A-Za-z]//g')"
 
                        line="${entry%%:*}"
-                       # This conditional should always be true unless 
compiler warnings
-                       # get drastically changed
-                       if [[ ${entry} =~ ${re} ]]; then
+                       if [[ ${is_utf8} -eq 1 && ${entry} =~ ${re_uni} ]] || 
[[ ${entry} =~ ${re_asc} ]]; then
                                func="${BASH_REMATCH[1]}"
+                       else
+                               continue
                        fi
 
                        has "${func}" "${QA_CONFIG_IMPL_DECL_SKIP[@]}" && 
continue
@@ -67,7 +86,7 @@ config_impl_decl_check() {
                        lines+=( "${line}" )
                        funcs+=( "${func}" )
                # Using -I to ignore binary files is a GNU extension for grep
-               done 3< <(grep -nEI -e '-Wimplicit-function-declaration' "${l}")
+               done 3< <(grep -nEI -e 
'-W(error=)?implicit-function-declaration' "${l}")
        done < <(find_log_targets)
 
        # Drop out early if no impl decls found (all the arrays are the same 
size)

Reply via email to