]> www.infradead.org Git - users/willy/linux.git/commitdiff
docs: kdoc: rework type prototype parsing
authorJonathan Corbet <corbet@lwn.net>
Thu, 3 Jul 2025 18:44:00 +0000 (12:44 -0600)
committerJonathan Corbet <corbet@lwn.net>
Tue, 8 Jul 2025 14:06:25 +0000 (08:06 -0600)
process_proto_type() is using a complex regex and a "while True" loop to
split a declaration into chunks and, in the end, count brackets.  Switch to
using a simpler regex to just do the split directly, and handle each chunk
as it comes.  The result is, IMO, easier to understand and reason about.

The old algorithm would occasionally elide the space between function
parameters; see struct rng_alg->generate(), foe example.  The only output
difference is to not elide that space, which is more correct.

Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Tested-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/20250703184403.274408-5-corbet@lwn.net
scripts/lib/kdoc/kdoc_parser.py

index 935f2a3c4b478053cfe29b7d4847c646c06cdab6..61da297df623dc60e1763d9b358c072f3dbe8cee 100644 (file)
@@ -1594,30 +1594,37 @@ class KernelDoc:
 
         # Strip C99-style comments and surrounding whitespace
         line = KernRe(r"//.*$", re.S).sub('', line).strip()
+        if not line:
+            return # nothing to see here
 
         # To distinguish preprocessor directive from regular declaration later.
         if line.startswith('#'):
             line += ";"
-
-        r = KernRe(r'([^\{\};]*)([\{\};])(.*)')
-        while True:
-            if r.search(line):
-                if self.entry.prototype:
-                    self.entry.prototype += " "
-                self.entry.prototype += r.group(1) + r.group(2)
-
-                self.entry.brcount += r.group(2).count('{')
-                self.entry.brcount -= r.group(2).count('}')
-
-                if r.group(2) == ';' and self.entry.brcount <= 0:
+        #
+        # Split the declaration on any of { } or ;, and accumulate pieces
+        # until we hit a semicolon while not inside {brackets}
+        #
+        r = KernRe(r'(.*?)([{};])')
+        for chunk in r.split(line):
+            if chunk:  # Ignore empty matches
+                self.entry.prototype += chunk
+                #
+                # This cries out for a match statement ... someday after we can
+                # drop Python 3.9 ...
+                #
+                if chunk == '{':
+                    self.entry.brcount += 1
+                elif chunk == '}':
+                    self.entry.brcount -= 1
+                elif chunk == ';' and self.entry.brcount <= 0:
                     self.dump_declaration(ln, self.entry.prototype)
                     self.reset_state(ln)
-                    break
-
-                line = r.group(3)
-            else:
-                self.entry.prototype += line
-                break
+                    return
+        #
+        # We hit the end of the line while still in the declaration; put
+        # in a space to represent the newline.
+        #
+        self.entry.prototype += ' '
 
     def process_proto(self, ln, line):
         """STATE_PROTO: reading a function/whatever prototype."""