MonetDB: Oct2020 - Initial version of a melcheck.py program to c...

Sjoerd Mullender Thu, 21 Jan 2021 00:36:22 -0800

Changeset: bdafff4d9886 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bdafff4d9886
Added Files:
        testing/melcheck.py
Modified Files:
        testing/exportutils.py
        testing/listexports.py.in
        testing/malcheck.py
Branch: Oct2020
Log Message:


Initial version of a melcheck.py program to check mel_func array contents.


diffs (truncated from 395 to 300 lines):

diff --git a/testing/exportutils.py b/testing/exportutils.py
--- a/testing/exportutils.py
+++ b/testing/exportutils.py
@@ -5,119 +5,157 @@
 # Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
 
 import re
-
-# a function-like #define that we expand to also find exports hidden
-# in preprocessor macros
-defre = re.compile(r'^[ \t]*#[ \t]*define[ \t]+'            # #define
-                   r'(?P<name>[a-zA-Z_][a-zA-Z0-9_]*)'      # name being 
defined
-                   r'\((?P<args>[a-zA-Z0-9_, \t]*)\)[ \t]*' # arguments
-                   r'(?P<def>.*)$',                         # macro replacement
-                   re.MULTILINE)
-# line starting with a "#"
-cldef = re.compile(r'^[ \t]*#', re.MULTILINE)
+import os
 
-# white space
-spcre = re.compile(r'\s+')
+# macro definition
+macrore = re.compile(r'\s*#\s*define\s+'            # #define
+                     r'(?P<name>\w+)'               # name being defined
+                     r'(?:\((?P<args>[^)]*)\))?\s*' # optional arguments
+                     r'(?P<repl>.*)')               # replacement
+# include file
+inclre = re.compile(r'\s*#\s*include\s+"(?P<file>[^"]*)"')
+# comments (/* ... */ where ... is as short as possible)
+cmtre = re.compile(r'/\*[^*]*(\*(?=[^/])[^*]*)*\*/|//.*')
+# horizontal white space
+spcre = re.compile(r'[ \t]+')
+# identifier
+identre = re.compile(r'\b(?P<ident>[a-zA-Z_]\w*)\b')
+# undef
+undefre = re.compile(r'\s*#\s*undef\s+(?P<name>\w+)')
+ifre = re.compile(r'\s*#\s*if') # #if or #ifdef
+elifre = re.compile(r'\s*#\s*elif')
+elsere = re.compile(r'\s*#\s*else')
+endifre = re.compile(r'\s*#\s*endif')
 
-# some regexps helping to normalize a declaration
-strre = re.compile(r'([^ *])\*')
-comre = re.compile(r',\s*')
-
-# comments (/* ... */ where ... is as short as possible)
-cmtre = re.compile(r'/\*.*?\*/|//[^\n]*', re.DOTALL)
-
-deepnesting = False
+nested = r'' # 
r'(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*'
 
-# do something a bit like the C preprocessor
-#
-# we expand function-like macros and remove all ## sequences from the
-# replacement (even when there are no adjacent parameters that were
-# replaced), but this is good enough for our purpose of finding
-# exports that are hidden away in several levels of macro definitions
-#
-# we assume that there are no continuation lines in the input
-def preprocess(data, printdef=False):
-    # remove C comments
-    res = cmtre.search(data)
+def process(line, funmac, macros, infunmac=False):
+    nline = ''
+    pos = 0
+    res = identre.search(line)
     while res is not None:
-        data = data[:res.start(0)] + ' ' + data[res.end(0):]
-        res = cmtre.search(data, res.start(0))
-    # remove \ <newline> combo's
+        name = res.group('ident')
+        if name in macros:
+            macros2 = macros.copy()
+            del macros2[name]
+            repl = process(macros[name], funmac, macros2)
+            if line[res.start(0)-1:res.start(0)] == '#' and \
+               line[res.start(0)-2:res.start(0)] != '##':
+                nline += line[pos:res.start(0)-1]
+                nline += '"' + repl.replace('\\', r'\\').replace('"', r'\"') + 
'"'
+            else:
+                nline += line[pos:res.start(0)] + repl
+            pos = res.end(0)
+        elif name in funmac:
+            args, repl = funmac[name]
+            pat = r'\s*\('
+            sep = r''
+            for arg in args:
+                pat += sep + r'([^,()]*(?:\([^()]*'+nested+r'\)[^,()]*)*)'
+                sep = r','
+            pat += r'\s*\)'
+            r = re.compile(pat)
+            res2 = r.match(line, pos=res.end(0))
+            if res2 is not None:
+                macros2 = {}
+                i = 1
+                for arg in args:
+                    macros2[arg] = res2.group(i).strip()
+                    i += 1
+                repl = process(repl, {}, macros2, True)
+                funmac2 = funmac.copy()
+                del funmac2[name]
+                repl = process(repl, funmac2, macros)
+                repl = repl.replace('##', '')
+                nline += line[pos:res.start(0)] + repl
+                pos = res2.end(0)
+            else:
+                nline += line[pos:res.end(0)]
+                pos = res.end(0)
+        else:
+            nline += line[pos:res.end(0)]
+            pos = res.end(0)
+        res = identre.search(line, pos=pos)
+    nline += line[pos:]
+    return nline
+
+def readfile(f, funmac=None, macros=None, files=None, printdef=False):
+    data = open(f).read()
+    dirname, f = os.path.split(f)
+    data = cmtre.sub(' ', data)
     data = data.replace('\\\n', '')
-
-    defines = {}
+    data = spcre.sub(' ', data)
+    data = data.splitlines()
+    if funmac is None:
+        funmac = {}
+    if macros is None:
+        macros = {}
+    if files is None:
+        files = set()
+    files.add(f)
     ndata = []
-    for line in data.split('\n'):
-        res = defre.match(line)
-        if res is not None:
-            name, args, body = res.groups()
-            args = tuple([x.strip() for x in args.split(',')])
-            if len(args) == 1 and args[0] == '':
-                args = ()       # empty argument list
-            if name not in ('__attribute__', '__format__', '__alloc_size__') 
and \
-               (name not in defines or not defines[name][1].strip()):
-                defines[name] = (args, body)
+    skip = []
+    for line in data:
+        if endifre.match(line) is not None:
             if printdef:
                 ndata.append(line)
-        else:
-            changed = True
-            while changed:
-                line, changed = replace(line, defines, [])
-            if printdef or not cldef.match(line):
+            if skip:
+                del skip[-1]
+            continue
+        if ifre.match(line) is not None:
+            if printdef:
                 ndata.append(line)
-    return '\n'.join(ndata)
-
-def replace(line, defines, tried):
-    changed = False
-    # match argument to macro with optionally several levels
-    # of parentheses
-    if deepnesting:     # optionally deeply nested parentheses
-        nested = 
r'(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*(?:\([^()]*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*\)[^()]*)*'
-    else:
-        nested = ''
-    for name, (args, body) in defines.items():
-        if name in tried:
+            skip.append(False)
+            continue
+        if elifre.match(line) or elsere.match(line):
+            if printdef:
+                ndata.append(line)
+            if skip:
+                skip[-1] = True
+            continue
+        if skip and skip[-1]:
+            if printdef:
+                ndata.append(line)
             continue
-        args, body = defines[name]
-        pat = r'\b%s\b\(' % name
-        sep = r''
-        for arg in args:
-            pat = pat + sep + r'([^,()]*(?:\([^()]*' + nested + r'\)[^,()]*)*)'
-            sep = ','
-        pat += r'\)'
-        repl = {}
-        r = re.compile(pat)
-        res = r.search(line)
-        while res is not None:
-            bd = body
-            changed = True
-            if len(args) > 0:
-                pars = [x.strip() for x in res.groups()]
-                pat = r'\b(?:'
-                sep = ''
-                for arg, par in zip(args, pars):
-                    repl[arg] = par
-                    pat += sep + arg
-                    sep = '|'
-                pat += r')\b'
-                r2 = re.compile(pat)
-                res2 = r2.search(bd)
-                while res2 is not None:
-                    arg = res2.group(0)
-                    if bd[res2.start(0)-1:res2.start(0)] == '#' and \
-                       bd[res2.start(0)-2:res2.start(0)] != '##':
-                        # replace #ARG by stringified replacement
-                        pos = res2.start(0) + len(repl[arg]) + 1
-                        bd = bd[:res2.start(0)-1] + '"' + repl[arg] + '"' + 
bd[res2.end(0):]
-                    else:
-                        pos = res2.start(0) + len(repl[arg])
-                        bd = bd[:res2.start(0)] + repl[arg] + bd[res2.end(0):]
-                    res2 = r2.search(bd, pos)
-            bd = bd.replace('##', '')
-            bd, changed = replace(bd, defines, tried + [name])
-            line = line[:res.start(0)] + bd + line[res.end(0):]
-            res = r.search(line, res.start(0) + len(bd))
-    return line, changed
+        res = macrore.match(line)
+        if res is not None:
+            if printdef:
+                ndata.append(line)
+            name = res.group('name')
+            args = res.group('args')
+            repl = res.group('repl')
+            if args:
+                args = tuple([x.strip() for x in args.split(',')])
+                if len(args) == 1 and args[0] == '':
+                    args = ()   # empty argument list
+                funmac[name] = (args, repl)
+                continue
+            macros[name] = repl
+            continue
+        res = inclre.match(line)
+        if res is not None:
+            fn = res.group('file')
+            if '/' not in fn and os.path.exists(os.path.join(dirname, fn)) and 
fn not in files:
+                incdata = readfile(os.path.join(dirname, fn), funmac, macros, 
files, printdef)
+                ndata.extend(incdata)
+                continue
+            ndata.append(line)
+            continue
+        res = undefre.match(line)
+        if res is not None:
+            name = res.group('name')
+            if name in macros:
+                del macros[name]
+            if name in funmac:
+                del funmac[name]
+            continue
+        line = process(line, funmac, macros)
+        ndata.append(line)
+    files.remove(f)
+    return ndata
+
+def preprocess(f, printdef=False):
+    return '\n'.join(readfile(f, printdef=printdef))
 
 def normalize(decl):
     decl = spcre.sub(' ', decl) \
@@ -137,4 +175,4 @@ def normalize(decl):
 if __name__ == '__main__':
     import sys
     for f in sys.argv[1:]:
-        print(preprocess(open(f).read(), printdef=True))
+        print(preprocess(f, printdef=True))
diff --git a/testing/listexports.py.in b/testing/listexports.py.in
--- a/testing/listexports.py.in
+++ b/testing/listexports.py.in
@@ -37,9 +37,8 @@ nmere = re.compile(r'\b(?P<name>[a-zA-Z_
 
 def extract(f):
     decls = []
-    data = open(f).read()
 
-    data = exportutils.preprocess(data)
+    data = exportutils.preprocess(f)
 
     res = expre.search(data)
     while res is not None:
diff --git a/testing/malcheck.py b/testing/malcheck.py
--- a/testing/malcheck.py
+++ b/testing/malcheck.py
@@ -82,8 +82,8 @@ odecls = {}
 pdecls = {}
 
 def process(f):
-    data = open(f).read()
     if f.endswith('.mal'):
+        data = open(f).read()
         data = re.sub(r'[ \t]*#.*', '', data) # remove comments
         for res in comreg.finditer(data):
             malf, args, rets, func = res.groups()
@@ -121,7 +121,7 @@ def process(f):
             malf, args, rets, func = res.groups()
             malpats.append((malf, func, f))
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

MonetDB: Oct2020 - Initial version of a melcheck.py program to c...

Reply via email to