> -----Original Message----- > From: Intel-wired-lan <[email protected]> On Behalf > Of Mauro Carvalho Chehab > Sent: Thursday, January 29, 2026 9:08 AM > To: Jonathan Corbet <[email protected]>; Linux Doc Mailing List <linux- > [email protected]> > Cc: Mauro Carvalho Chehab <[email protected]>; > [email protected]; [email protected]; linux- > [email protected]; [email protected]; > [email protected]; Gustavo A. R. Silva <[email protected]>; > Kees Cook <[email protected]>; Mauro Carvalho Chehab > <[email protected]>; Randy Dunlap <[email protected]>; Shuah Khan > <[email protected]> > Subject: [Intel-wired-lan] [PATCH v3 28/30] docs: kdoc_parser: move > transform lists to a separate file > > Over the time, most of the changes at kernel-doc are related to > maintaining a list of transforms to convert macros into pure C code. > > Place such transforms on a separate module, to cleanup the parser > module. > > While here, drop the now obsolete comment about the two-steps logic to > handle struct_group macros. > > There is an advantage on that: QEMU also uses our own kernel-doc, but > the xforms list there is different. By placing it on a separate > module, we can minimize the differences and make it easier to keep > QEMU in sync with Kernel upstream. > > Signed-off-by: Mauro Carvalho Chehab <[email protected]> > --- > Documentation/tools/kdoc_parser.rst | 8 ++ > tools/lib/python/kdoc/kdoc_files.py | 3 +- > tools/lib/python/kdoc/kdoc_parser.py | 147 ++----------------------- > - tools/lib/python/kdoc/xforms_lists.py | 117 ++++++++++++++++++++ > 4 files changed, 133 insertions(+), 142 deletions(-) create mode > 100644 tools/lib/python/kdoc/xforms_lists.py > > diff --git a/Documentation/tools/kdoc_parser.rst > b/Documentation/tools/kdoc_parser.rst > index 03ee54a1b1cc..55b202173195 100644 > --- a/Documentation/tools/kdoc_parser.rst > +++ b/Documentation/tools/kdoc_parser.rst > @@ -4,6 +4,14 @@ > Kernel-doc parser stage > ======================= > > +C replacement rules used by the parser > +====================================== > + > +.. automodule:: lib.python.kdoc.xforms_lists > + :members: > + :show-inheritance: > + :undoc-members: > + > File handler classes > ==================== > > diff --git a/tools/lib/python/kdoc/kdoc_files.py > b/tools/lib/python/kdoc/kdoc_files.py > index 022487ea2cc6..7357c97a4b01 100644 > --- a/tools/lib/python/kdoc/kdoc_files.py > +++ b/tools/lib/python/kdoc/kdoc_files.py > @@ -15,6 +15,7 @@ import os > import re > > from kdoc.kdoc_parser import KernelDoc > +from kdoc.xforms_lists import CTransforms > from kdoc.kdoc_output import OutputFormat > > > @@ -117,7 +118,7 @@ class KernelFiles(): > if fname in self.files: > return > > - doc = KernelDoc(self.config, fname) > + doc = KernelDoc(self.config, fname, CTransforms) > export_table, entries = doc.parse_kdoc() > > self.export_table[fname] = export_table diff --git > a/tools/lib/python/kdoc/kdoc_parser.py > b/tools/lib/python/kdoc/kdoc_parser.py > index 1e8e156e2a9e..a280fe581937 100644 > --- a/tools/lib/python/kdoc/kdoc_parser.py > +++ b/tools/lib/python/kdoc/kdoc_parser.py > @@ -75,142 +75,6 @@ doc_begin_func = KernRe(str(doc_com) + > # initial " * ' > # > struct_args_pattern = r'([^,)]+)' > > -struct_xforms = [ > - # Strip attributes > - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", > flags=re.I | re.S, cache=False), ' '), > - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), > - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), > - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), > - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), > - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), > - (KernRe(r'\s*__packed\s*', re.S), ' '), > - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), > - (KernRe(r'\s*__private', re.S), ' '), > - (KernRe(r'\s*__rcu', re.S), ' '), > - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), > - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), > - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), > - # > - # Unwrap struct_group macros based on this definition: > - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) > - # which has variants like: struct_group(NAME, MEMBERS...) > - # Only MEMBERS arguments require documentation. > - # > - # Parsing them happens on two steps: > - # > - # 1. drop struct group arguments that aren't at MEMBERS, > - # storing them as STRUCT_GROUP(MEMBERS) > - # > - # 2. remove STRUCT_GROUP() ancillary macro. > - # > - # The original logic used to remove STRUCT_GROUP() using an > - # advanced regex: > - # > - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; > - # > - # with two patterns that are incompatible with > - # Python re module, as it has: > - # > - # - a recursive pattern: (?1) > - # - an atomic grouping: (?>...) > - # > - # I tried a simpler version: but it didn't work either: > - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; > - # > - # As it doesn't properly match the end parenthesis on some cases. > - # > - # So, a better solution was crafted: there's now a CFunction > - # class that ensures that delimiters after a search are properly > - # matched. So, the implementation to drop STRUCT_GROUP() will be > - # handled in separate. > - # > - (CFunction('struct_group'), r'\2'), > - (CFunction('struct_group_attr'), r'\3'), > - (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), > - (CFunction('__struct_group'), r'\4'), > - > - # > - # Replace macros > - # > - # TODO: use CFunction on all FOO($1, $2, ...) matches > - # > - # it is better to also move those to the CFunction logic, > - # to ensure that parentheses will be properly matched. > - # > - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', > re.S), > - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), > - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), > - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), > - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + > struct_args_pattern + r'\)', > - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), > - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' > + struct_args_pattern + r'\)', > - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), > - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + > struct_args_pattern + > - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), > - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' > + > - struct_args_pattern + r'\)', re.S), r'\2 *\1'), > - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + > r',\s*' + > - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), > - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + > r'\)', re.S), r'dma_addr_t \1'), > - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + > r'\)', re.S), r'__u32 \1'), > - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 > \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), > -] > - > -# > -# Transforms for function prototypes > -# > -function_xforms = [ > - (KernRe(r"^static +"), ""), > - (KernRe(r"^extern +"), ""), > - (KernRe(r"^asmlinkage +"), ""), > - (KernRe(r"^inline +"), ""), > - (KernRe(r"^__inline__ +"), ""), > - (KernRe(r"^__inline +"), ""), > - (KernRe(r"^__always_inline +"), ""), > - (KernRe(r"^noinline +"), ""), > - (KernRe(r"^__FORTIFY_INLINE +"), ""), > - (KernRe(r"__init +"), ""), > - (KernRe(r"__init_or_module +"), ""), > - (KernRe(r"__deprecated +"), ""), > - (KernRe(r"__flatten +"), ""), > - (KernRe(r"__meminit +"), ""), > - (KernRe(r"__must_check +"), ""), > - (KernRe(r"__weak +"), ""), > - (KernRe(r"__sched +"), ""), > - (KernRe(r"_noprof"), ""), > - (KernRe(r"__always_unused *"), ""), > - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), > - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), > ""), > - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), > - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), > r"\1, \2"), > - (KernRe(r"__no_context_analysis\s*"), ""), > - (KernRe(r"__attribute_const__ +"), ""), > - (CFunction("__cond_acquires"), ""), > - (CFunction("__cond_releases"), ""), > - (CFunction("__acquires"), ""), > - (CFunction("__releases"), ""), > - (CFunction("__must_hold"), ""), > - (CFunction("__must_not_hold"), ""), > - (CFunction("__must_hold_shared"), ""), > - (CFunction("__cond_acquires_shared"), ""), > - (CFunction("__acquires_shared"), ""), > - (CFunction("__releases_shared"), ""), > - (CFunction("__attribute__"), ""), > -] > - > -# > -# Transforms for variable prototypes > -# > -var_xforms = [ > - (KernRe(r"__read_mostly"), ""), > - (KernRe(r"__ro_after_init"), ""), > - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), > - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), > - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), > - (KernRe(r"(?://.*)$"), ""), > - (KernRe(r"(?:/\*.*\*/)"), ""), > - (KernRe(r";$"), ""), > -] > > # > # Ancillary functions > @@ -394,11 +258,12 @@ class KernelDoc: > #: String to write when a parameter is not described. > undescribed = "-- undescribed --" > > - def __init__(self, config, fname): > + def __init__(self, config, fname, xforms): > """Initialize internal variables""" > > self.fname = fname > self.config = config > + self.xforms = xforms > > # Initial state for the state machines > self.state = state.NORMAL > @@ -889,7 +754,7 @@ class KernelDoc: > # Go through the list of members applying all of our > transformations. > # > members = trim_private_members(members) > - members = self.apply_transforms(struct_xforms, members) > + members = self.apply_transforms(self.xforms.struct_xforms, > + members) > > # > # Deal with embedded struct and union members, and drop enums > entirely. > @@ -1011,8 +876,7 @@ class KernelDoc: > # Drop comments and macros to have a pure C prototype > # > if not declaration_name: > - for r, sub in var_xforms: > - proto = r.sub(sub, proto) > + proto = self.apply_transforms(self.xforms.var_xforms, > + proto) > > proto = proto.rstrip() > > @@ -1104,7 +968,8 @@ class KernelDoc: > # > # Apply the initial transformations. > # > - prototype = self.apply_transforms(function_xforms, > prototype) > + prototype = > self.apply_transforms(self.xforms.function_xforms, > + prototype) > > # Yes, this truly is vile. We are looking for: > # 1. Return type (may be nothing if we're looking at a macro) > diff --git a/tools/lib/python/kdoc/xforms_lists.py > b/tools/lib/python/kdoc/xforms_lists.py > new file mode 100644 > index 000000000000..88968bafdb78 > --- /dev/null > +++ b/tools/lib/python/kdoc/xforms_lists.py > @@ -0,0 +1,117 @@ > +#!/usr/bin/env python3 > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright(c) 2026: Mauro Carvalho Chehab <[email protected]>. > + > +import re > + > +from kdoc.kdoc_re import CFunction, KernRe > + > +struct_args_pattern = r'([^,)]+)' > + > +class CTransforms: > + """ > + Data class containing a long set of transformations to turn > + structure member prefixes, and macro invocations and variables > + into something we can parse and generate kdoc for. > + """ > + > + #: Transforms for structs and unions > + struct_xforms = [ > + # Strip attributes > + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", > flags=re.I | re.S, cache=False), ' '), > + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), > + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), > + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), > + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), > + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), > + (KernRe(r'\s*__packed\s*', re.S), ' '), > + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), > + (KernRe(r'\s*__private', re.S), ' '), > + (KernRe(r'\s*__rcu', re.S), ' '), > + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), > + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), > + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), > + > + (CFunction('struct_group'), r'\2'), > + (CFunction('struct_group_attr'), r'\3'), > + (CFunction('struct_group_tagged'), r'struct \1 \2; \3'), > + (CFunction('__struct_group'), r'\4'), > + > + # > + # Replace macros > + # > + # TODO: use CFunction on all FOO($1, $2, ...) matches > + # > + # it is better to also move those to the CFunction logic, > + # to ensure that parentheses will be properly matched. > + # > + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', > re.S), > + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), > + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), > + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), > + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + > r',\s*' + struct_args_pattern + r'\)', > + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), > + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + > r',\s*' + struct_args_pattern + r'\)', > + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), > + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' > + struct_args_pattern + > + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 > *\1'), > + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + > r',\s*' + > + struct_args_pattern + r'\)', re.S), r'\2 *\1'), > + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + > struct_args_pattern + r',\s*' + > + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), > + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + > r'\)', re.S), r'dma_addr_t \1'), > + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + > r'\)', re.S), r'__u32 \1'), > + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { > u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), > + ] > + > + #: Transforms for function prototypes > + function_xforms = [ > + (KernRe(r"^static +"), ""), > + (KernRe(r"^extern +"), ""), > + (KernRe(r"^asmlinkage +"), ""), > + (KernRe(r"^inline +"), ""), > + (KernRe(r"^__inline__ +"), ""), > + (KernRe(r"^__inline +"), ""), > + (KernRe(r"^__always_inline +"), ""), > + (KernRe(r"^noinline +"), ""), > + (KernRe(r"^__FORTIFY_INLINE +"), ""), > + (KernRe(r"__init +"), ""), > + (KernRe(r"__init_or_module +"), ""), > + (KernRe(r"__deprecated +"), ""), > + (KernRe(r"__flatten +"), ""), > + (KernRe(r"__meminit +"), ""), > + (KernRe(r"__must_check +"), ""), > + (KernRe(r"__weak +"), ""), > + (KernRe(r"__sched +"), ""), > + (KernRe(r"_noprof"), ""), > + (KernRe(r"__always_unused *"), ""), > + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), > + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) > +"), ""), > + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), > ""), > + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), > r"\1, \2"), > + (KernRe(r"__no_context_analysis\s*"), ""), > + (KernRe(r"__attribute_const__ +"), ""), > + (CFunction("__cond_acquires"), ""), > + (CFunction("__cond_releases"), ""), > + (CFunction("__acquires"), ""), > + (CFunction("__releases"), ""), > + (CFunction("__must_hold"), ""), > + (CFunction("__must_not_hold"), ""), > + (CFunction("__must_hold_shared"), ""), > + (CFunction("__cond_acquires_shared"), ""), > + (CFunction("__acquires_shared"), ""), > + (CFunction("__releases_shared"), ""), > + (CFunction("__attribute__"), ""), > + ] > + > + #: Transforms for variables > + var_xforms = [ > + (KernRe(r"__read_mostly"), ""), > + (KernRe(r"__ro_after_init"), ""), > + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), > + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), > + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), > + (KernRe(r"(?://.*)$"), ""), > + (KernRe(r"(?:/\*.*\*/)"), ""), > + (KernRe(r";$"), ""), > + ] > -- > 2.52.0
Reviewed-by: Aleksandr Loktionov <[email protected]>
