From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 11 Aug 2025 16:29:07 +0000 (-0600)
Subject: Merge branch 'dump-struct' into docs-mw
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=7b41f6f9371b90052c99afd655140c8f6bfb18cf;p=users%2Fhch%2Fmisc.git

Merge branch 'dump-struct' into docs-mw

In my ongoing effort to truly understand our new kernel-doc, I continue to
make changes to improve the code, and to try to make the understanding task
easier for the next person.  These patches focus on dump_struct() in
particular, which starts out at nearly 300 lines long - to much to fit into
my little brain anyway.  Hopefully the result is easier to manage.

There are no changes in the rendered docs.
---

7b41f6f9371b90052c99afd655140c8f6bfb18cf
diff --cc scripts/lib/kdoc/kdoc_parser.py
index fe730099eca8,9b21fb86709a..9e65948f8254
--- a/scripts/lib/kdoc/kdoc_parser.py
+++ b/scripts/lib/kdoc/kdoc_parser.py
@@@ -539,177 -640,64 +640,63 @@@ class KernelDoc
  
          r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
          if r.search(proto):
-             decl_type = r.group(1)
-             declaration_name = r.group(2)
-             members = r.group(3)
+             return (r.group(1), r.group(2), r.group(3))
          else:
              r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
- 
              if r.search(proto):
-                 decl_type = r.group(1)
-                 declaration_name = r.group(3)
-                 members = r.group(2)
- 
-         if not members:
-             self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
-             return
- 
-         if self.entry.identifier != declaration_name:
-             self.emit_msg(ln,
-                           f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n")
-             return
- 
-         args_pattern = r'([^,)]+)'
- 
-         sub_prefixes = [
-             (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''),
-             (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''),
- 
-             # Strip comments
-             (KernRe(r'\/\*.*?\*\/', re.S), ''),
- 
-             # Strip attributes
-             (attribute, ' '),
-             (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-             (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-             (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-             (KernRe(r'\s*__packed\s*', re.S), ' '),
-             (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-             (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-             (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
- 
-             # Unwrap struct_group macros based on this definition:
-             # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-             # which has variants like: struct_group(NAME, MEMBERS...)
-             # Only MEMBERS arguments require documentation.
-             #
-             # Parsing them happens on two steps:
-             #
-             # 1. drop struct group arguments that aren't at MEMBERS,
-             #    storing them as STRUCT_GROUP(MEMBERS)
-             #
-             # 2. remove STRUCT_GROUP() ancillary macro.
-             #
-             # The original logic used to remove STRUCT_GROUP() using an
-             # advanced regex:
-             #
-             #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-             #
-             # with two patterns that are incompatible with
-             # Python re module, as it has:
-             #
-             #   - a recursive pattern: (?1)
-             #   - an atomic grouping: (?>...)
-             #
-             # I tried a simpler version: but it didn't work either:
-             #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-             #
-             # As it doesn't properly match the end parenthesis on some cases.
-             #
-             # So, a better solution was crafted: there's now a NestedMatch
-             # class that ensures that delimiters after a search are properly
-             # matched. So, the implementation to drop STRUCT_GROUP() will be
-             # handled in separate.
- 
-             (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-             (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
-             (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
-             (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
- 
-             # Replace macros
-             #
-             # TODO: use NestedMatch for FOO($1, $2, ...) matches
-             #
-             # it is better to also move those to the NestedMatch logic,
-             # to ensure that parenthesis will be properly matched.
- 
-             (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-             (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-             (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-             (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-             (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
-             (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
-             (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'),
-             (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'),
-             (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'),
-             (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'),
-         ]
- 
-         # Regexes here are guaranteed to have the end limiter matching
-         # the start delimiter. Yet, right now, only one replace group
-         # is allowed.
- 
-         sub_nested_prefixes = [
-             (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
-         ]
- 
-         for search, sub in sub_prefixes:
-             members = search.sub(sub, members)
- 
-         nested = NestedMatch()
- 
-         for search, sub in sub_nested_prefixes:
-             members = nested.sub(search, sub, members)
- 
-         # Keeps the original declaration as-is
-         declaration = members
- 
-         # Split nested struct/union elements
-         #
-         # This loop was simpler at the original kernel-doc perl version, as
-         #   while ($members =~ m/$struct_members/) { ... }
-         # reads 'members' string on each interaction.
-         #
-         # Python behavior is different: it parses 'members' only once,
-         # creating a list of tuples from the first interaction.
+                 return (r.group(1), r.group(3), r.group(2))
+         return None
 -
+     #
+     # Rewrite the members of a structure or union for easier formatting later on.
+     # Among other things, this function will turn a member like:
+     #
+     #  struct { inner_members; } foo;
+     #
+     # into:
+     #
+     #  struct foo; inner_members;
+     #
+     def rewrite_struct_members(self, members):
          #
-         # On other words, this won't get nested structs.
+         # Process struct/union members from the most deeply nested outward.  The
+         # trick is in the ^{ below - it prevents a match of an outer struct/union
+         # until the inner one has been munged (removing the "{" in the process).
          #
-         # So, we need to have an extra loop on Python to override such
-         # re limitation.
- 
-         while True:
-             tuples = struct_members.findall(members)
-             if not tuples:
-                 break
- 
+         struct_members = KernRe(r'(struct|union)'   # 0: declaration type
+                                 r'([^\{\};]+)' 	    # 1: possible name
+                                 r'(\{)'
+                                 r'([^\{\}]*)'       # 3: Contents of declaration
+                                 r'(\})'
+                                 r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
+         tuples = struct_members.findall(members)
+         while tuples:
              for t in tuples:
                  newmember = ""
-                 maintype = t[0]
-                 s_ids = t[5]
-                 content = t[3]
- 
-                 oldmember = "".join(t)
- 
-                 for s_id in s_ids.split(','):
+                 oldmember = "".join(t) # Reconstruct the original formatting
+                 dtype, name, lbr, content, rbr, rest, semi = t
+                 #
+                 # Pass through each field name, normalizing the form and formatting.
+                 #
+                 for s_id in rest.split(','):
                      s_id = s_id.strip()
- 
-                     newmember += f"{maintype} {s_id}; "
+                     newmember += f"{dtype} {s_id}; "
+                     #
+                     # Remove bitfield/array/pointer info, getting the bare name.
+                     #
                      s_id = KernRe(r'[:\[].*').sub('', s_id)
                      s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
- 
+                     #
+                     # Pass through the members of this inner structure/union.
+                     #
                      for arg in content.split(';'):
                          arg = arg.strip()
- 
-                         if not arg:
-                             continue
- 
-                         r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)')
+                         #
+                         # Look for (type)(*name)(args) - pointer to function
+                         #
+                         r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
                          if r.match(arg):
+                             dtype, name, extra = r.group(1), r.group(2), r.group(3)
                              # Pointer-to-function
-                             dtype = r.group(1)
-                             name = r.group(2)
-                             extra = r.group(3)
- 
-                             if not name:
-                                 continue
- 
                              if not s_id:
                                  # Anonymous struct/union
                                  newmember += f"{dtype}{name}{extra}; "