r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
if r.search(proto):
- decl_type = r.group(1)
- declaration_name = r.group(2)
- members = r.group(3)
+ return (r.group(1), r.group(2), r.group(3))
else:
r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
-
if r.search(proto):
- decl_type = r.group(1)
- declaration_name = r.group(3)
- members = r.group(2)
-
- if not members:
- self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
- return
-
- if self.entry.identifier != declaration_name:
- self.emit_msg(ln,
- f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n")
- return
-
- args_pattern = r'([^,)]+)'
-
- sub_prefixes = [
- (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''),
- (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''),
-
- # Strip comments
- (KernRe(r'\/\*.*?\*\/', re.S), ''),
-
- # Strip attributes
- (attribute, ' '),
- (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__packed\s*', re.S), ' '),
- (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-
- # Unwrap struct_group macros based on this definition:
- # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
- # which has variants like: struct_group(NAME, MEMBERS...)
- # Only MEMBERS arguments require documentation.
- #
- # Parsing them happens on two steps:
- #
- # 1. drop struct group arguments that aren't at MEMBERS,
- # storing them as STRUCT_GROUP(MEMBERS)
- #
- # 2. remove STRUCT_GROUP() ancillary macro.
- #
- # The original logic used to remove STRUCT_GROUP() using an
- # advanced regex:
- #
- # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
- #
- # with two patterns that are incompatible with
- # Python re module, as it has:
- #
- # - a recursive pattern: (?1)
- # - an atomic grouping: (?>...)
- #
- # I tried a simpler version: but it didn't work either:
- # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
- #
- # As it doesn't properly match the end parenthesis on some cases.
- #
- # So, a better solution was crafted: there's now a NestedMatch
- # class that ensures that delimiters after a search are properly
- # matched. So, the implementation to drop STRUCT_GROUP() will be
- # handled in separate.
-
- (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
- (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
-
- # Replace macros
- #
- # TODO: use NestedMatch for FOO($1, $2, ...) matches
- #
- # it is better to also move those to the NestedMatch logic,
- # to ensure that parenthesis will be properly matched.
-
- (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
- (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
- (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
- (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
- (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'),
- (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'),
- (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'),
- (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'),
- ]
-
- # Regexes here are guaranteed to have the end limiter matching
- # the start delimiter. Yet, right now, only one replace group
- # is allowed.
-
- sub_nested_prefixes = [
- (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
- ]
-
- for search, sub in sub_prefixes:
- members = search.sub(sub, members)
-
- nested = NestedMatch()
-
- for search, sub in sub_nested_prefixes:
- members = nested.sub(search, sub, members)
-
- # Keeps the original declaration as-is
- declaration = members
-
- # Split nested struct/union elements
- #
- # This loop was simpler at the original kernel-doc perl version, as
- # while ($members =~ m/$struct_members/) { ... }
- # reads 'members' string on each interaction.
- #
- # Python behavior is different: it parses 'members' only once,
- # creating a list of tuples from the first interaction.
+ return (r.group(1), r.group(3), r.group(2))
+ return None
-
+ #
+ # Rewrite the members of a structure or union for easier formatting later on.
+ # Among other things, this function will turn a member like:
+ #
+ # struct { inner_members; } foo;
+ #
+ # into:
+ #
+ # struct foo; inner_members;
+ #
+ def rewrite_struct_members(self, members):
#
- # On other words, this won't get nested structs.
+ # Process struct/union members from the most deeply nested outward. The
+ # trick is in the ^{ below - it prevents a match of an outer struct/union
+ # until the inner one has been munged (removing the "{" in the process).
#
- # So, we need to have an extra loop on Python to override such
- # re limitation.
-
- while True:
- tuples = struct_members.findall(members)
- if not tuples:
- break
-
+ struct_members = KernRe(r'(struct|union)' # 0: declaration type
+ r'([^\{\};]+)' # 1: possible name
+ r'(\{)'
+ r'([^\{\}]*)' # 3: Contents of declaration
+ r'(\})'
+ r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration
+ tuples = struct_members.findall(members)
+ while tuples:
for t in tuples:
newmember = ""
- maintype = t[0]
- s_ids = t[5]
- content = t[3]
-
- oldmember = "".join(t)
-
- for s_id in s_ids.split(','):
+ oldmember = "".join(t) # Reconstruct the original formatting
+ dtype, name, lbr, content, rbr, rest, semi = t
+ #
+ # Pass through each field name, normalizing the form and formatting.
+ #
+ for s_id in rest.split(','):
s_id = s_id.strip()
-
- newmember += f"{maintype} {s_id}; "
+ newmember += f"{dtype} {s_id}; "
+ #
+ # Remove bitfield/array/pointer info, getting the bare name.
+ #
s_id = KernRe(r'[:\[].*').sub('', s_id)
s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
-
+ #
+ # Pass through the members of this inner structure/union.
+ #
for arg in content.split(';'):
arg = arg.strip()
-
- if not arg:
- continue
-
- r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)')
+ #
+ # Look for (type)(*name)(args) - pointer to function
+ #
+ r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
if r.match(arg):
+ dtype, name, extra = r.group(1), r.group(2), r.group(3)
# Pointer-to-function
- dtype = r.group(1)
- name = r.group(2)
- extra = r.group(3)
-
- if not name:
- continue
-
if not s_id:
# Anonymous struct/union
newmember += f"{dtype}{name}{extra}; "