On Fri, Aug 08, 2025 at 04:44:02PM +0000, Marat Khalili wrote: > Thank you for doing this! Very cool script, see couple of nits below. > > > -----Original Message----- > > From: Bruce Richardson <bruce.richard...@intel.com> > > Sent: Friday 8 August 2025 15:27 > > To: dev@dpdk.org > > Cc: Bruce Richardson <bruce.richard...@intel.com> > > Subject: [PATCH 1/2] devtools/mailmap_ctl: script to work with mailmap > > > > Add a script to easily add entries to, check and sort the mailmap file. > > > > Signed-off-by: Bruce Richardson <bruce.richard...@intel.com> > > --- > > devtools/mailmap_ctl.py | 211 ++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 211 insertions(+) > > create mode 100755 devtools/mailmap_ctl.py > > > > diff --git a/devtools/mailmap_ctl.py b/devtools/mailmap_ctl.py > > new file mode 100755 > > index 0000000000..ffb7bcd69b > > --- /dev/null > > +++ b/devtools/mailmap_ctl.py > > @@ -0,0 +1,211 @@ > > +#!/usr/bin/env python3 > > +# SPDX-License-Identifier: BSD-3-Clause > > +# Copyright(c) 2025 Intel Corporation > > + > > +""" > > +A tool for manipulating the .mailmap file in DPDK repository. > > + > > +This script supports three operations: > > +- add: adds a new entry to the mailmap file in the correct position > > +- check: validates mailmap entries are sorted and correctly formatted > > +- sort: sorts the mailmap entries alphabetically by name > > +""" > > + > > +import sys > > +import os > > +import re > > +import argparse > > +import unicodedata > > +from pathlib import Path > > +from dataclasses import dataclass > > +from typing import List, Optional > > + > > + > > +@dataclass > > +class MailmapEntry: > > + """Represents a single mailmap entry.""" > > + > > + name: str > > + name_for_sorting: str > > + email1: str > > + email2: Optional[str] > > + line_number: int > > + > > + def __str__(self) -> str: > > + """Format the entry back to mailmap string format.""" > > + return f"{self.name} <{self.email1}>" + (f" <{self.email2}>" if > > self.email2 else "") > > + > > + @staticmethod > > + def _get_name_for_sorting(name): > > + """Normalize a name for sorting purposes.""" > > + # Remove accents/diacritics. Separate accented chars into two - so > > accent is separate, > > + # then remove the accent. > > + normalized = unicodedata.normalize("NFD", name) > > + normalized = "".join(c for c in normalized if > > unicodedata.category(c) != "Mn") > > + > > + return normalized.lower() > > + > > + @classmethod > > + def parse(cls, line: str, line_number: int) -> > > Optional["MailmapEntry"]: > > + """ > > + Parse a mailmap line and create a MailmapEntry instance. > > + > > + Valid formats: > > + - Name <email> > > + - Name <primary_email> <secondary_email> > > + """ > > + line = line.strip() > > + if not line or line.startswith("#"): > > + return None > > + > > + # Pattern to match mailmap entries > > + # Group 1: Name, Group 2: first email, Group 3: optional second > > email > > + pattern = r"^([^<]+?)\s*<([^>]+)>(?:\s*<([^>]+)>)?$" > > + match = re.match(pattern, line) > > + if not match: > > + return None > > + > > + name = match.group(1).strip() > > + primary_email = match.group(2).strip() > > + secondary_email = match.group(3).strip() if match.group(3) else > > None > > + > > + return cls( > > + name=name, > > + name_for_sorting=cls._get_name_for_sorting(name), > > + email1=primary_email, > > + email2=secondary_email, > > + line_number=line_number, > > + ) > > + > > + > > +def read_and_parse_mailmap(mailmap_path: Path) -> List[MailmapEntry]: > > + """Read and parse a mailmap file, returning entries.""" > > + try: > > + with open(mailmap_path, "r", encoding="utf-8") as f: > > + lines = f.readlines() > > + except IOError as e: > > + print(f"Error reading {mailmap_path}: {e}", file=sys.stderr) > > + sys.exit(1) > > + > > + entries = [] > > + line_num = 0 > > + > > + for line in lines: > > + line_num += 1 > > nit: could use `for line_num, line in enumerate(lines, 1)`. > Ack. Will change in V2.
> > + stripped_line = line.strip() > > + > > + # Skip empty lines and comments > > + if not stripped_line or stripped_line.startswith("#"): > > + continue > > + > > + entry = MailmapEntry.parse(stripped_line, line_num) > > + if entry is None: > > + print(f"Line {line_num}: Invalid format - {stripped_line}", > > file=sys.stderr) > > + continue > > Should we fail here instead of continuing? If the operation is check, the > check should not pass. If the operation is add, we probably don't want to > simply remove everything we couldn't parse. > Adding a fail-on-error parameter to the function to handle the two cases. For "check" op, we continue, for other cases we exit(1). > > + > > + # Check for more than two email addresses > > + if stripped_line.count("<") > 2: > > + print(f"Line {line_num}: Too many email addresses - > > {stripped_line}", file=sys.stderr) > > If this is invalid should we perhaps modify regex to disallow it in > MailmapEntry.parse so that it affects new records as well? > Good point - the regex should already enforce this, because it checks the full entry up to end of line, and only supports an optional second address. A quick test proves this out - attempting to add a line with 3 email addresses we get a failure before we reach this point. Therefore, I'll remove this check completely in V2. > > + > > + entries.append(entry) > > + return entries > > + > > + > > +def write_entries_to_file(mailmap_path: Path, entries: List[MailmapEntry]): > > + """Write entries to mailmap file.""" > > + try: > > + with open(mailmap_path, "w", encoding="utf-8") as f: > > + for entry in entries: > > + f.write(str(entry) + "\n") > > + except IOError as e: > > + print(f"Error writing {mailmap_path}: {e}", file=sys.stderr) > > + sys.exit(1) > > + > > + > > +def check_mailmap(mailmap_path, _): > > + """Check that mailmap entries are correctly sorted and formatted.""" > > As noted above, it will not fail if some entries are incorrectly formatted. > > Also, we could probably check for duplicates. > Yes, but I will leave this for future work, as I don't believe it's a problem we currently have with our mailmap file. > > + entries = read_and_parse_mailmap(mailmap_path) > > + > > + errors = 0 > > + for i in range(1, len(entries)): > > + if entries[i].name_for_sorting < entries[i - 1].name_for_sorting: > > nit: could use `for entry1, entry2 in itertools.pairwise(entries):` > Interesting. Will test this option out. > > + print( > > + f"Line {entries[i].line_number}: Entry '{entries[i].name}' > > is incorrectly sorted", > > + file=sys.stderr, > > + ) > > + errors += 1 > > + > > + if errors: > > + sys.exit(1) > > + > > + > > +def sort_mailmap(mailmap_path, _): > > + """Sort the mailmap entries alphabetically by name.""" > > Should we warn user somewhere that all comments are going to be deleted? > Should we allow comments at all if this is what we do? > Again, in DPDK case, we don't have comments so this is not an issue. However, I'll add a note to the usage details. > > + entries = read_and_parse_mailmap(mailmap_path) > > + > > + entries.sort(key=lambda x: x.name_for_sorting) > > + write_entries_to_file(mailmap_path, entries) > > + > > + > > +def add_entry(mailmap_path, args): > > + """Add a new entry to the mailmap file in the correct alphabetical > > position.""" > > + if not args.entry: > > nit: it is possible to make argparse check it using subparsers or groups. > > > + print("Error: 'add' operation requires an entry argument", > > file=sys.stderr) > > + sys.exit(1) > > + > > + new_entry = MailmapEntry.parse(args.entry, 0) > > + if new_entry is None: > > nit: it is possible to make argparse convert argument to MailmapEntry and > report error to the user in a standard way if it fails, but it will require > some redesign of MailmapEntry so maybe not worth it. > Something to investigate. May not make V2 of this patch. > > + print(f"Error: Invalid entry format: {args.entry}", > > file=sys.stderr) > > + sys.exit(1) > > + > > + entries = read_and_parse_mailmap(mailmap_path) > > + > > + # Check if entry already exists, checking email2 only if it's specified > > + if ( > > + not new_entry.email2 > > + and any(e.name == new_entry.name and e.email1 == new_entry.email1 > > for e in entries) > > + ) or any( > > This will usually trigger even when `not new_entry.email2`. > Can you clarify this comment? Is there something I need to fix here? > > + e.name == new_entry.name and e.email1 == new_entry.email1 and > > e.email2 == new_entry.email2 > > + for e in entries > > + ): > > + print( > > + f"Warning: Duplicate entry for '{new_entry.name} > > <{new_entry.email1}>' already exists", > > Probably not a "Warning" if we exit with error code right after. > Good point, I'll change it to an error. > Also the error message is slightly misleading when the second any returns > true. I'd split this into two independent checks each with own error message, > and select between them depending on the presence of new_entry.email2. Only very slightly misleading, IMHO, so I don't think it's worth adding a different error message for the second case. > > > + file=sys.stderr, > > + ) > > + sys.exit(1) > > + > > + entries.append(new_entry) > > + entries.sort(key=lambda x: x.name_for_sorting) > > + write_entries_to_file(mailmap_path, entries) > > + > > + > > +def main(): > > + """Main function.""" > > + parser = argparse.ArgumentParser( > > + description=__doc__, > > formatter_class=argparse.RawDescriptionHelpFormatter > > + ) > > + parser.add_argument("operation", choices=["check", "add", "sort"], > > help="Operation to perform") > > Can we build choices from keys of operations dict? > Yes we can. Nice design change which makes it easier to add new ops in future. > > + parser.add_argument("--mailmap", help="Path to .mailmap file (default: > > search up tree)") > > + parser.add_argument("entry", nargs="?", help='Entry to add. Format: > > "Name <em...@domain.com>"') > > Secondary email is not mentioned. Actually, if I want to add a secondary > email when I already have primary, what do I do? > You hand-edit for now! :-) That is something that I think we should add in future, but I'm keeping it simple for now. [For most of the patch application that I do to my tree, it's only adding completely new entries to mailmap, so getting that working was my primary concern] > > + > > + args = parser.parse_args() > > + > > + if args.mailmap: > > + mailmap_path = Path(args.mailmap) > > + else: > > + # Find mailmap file > > + mailmap_path = Path(".").resolve() > > + while not (mailmap_path / ".mailmap").exists(): > > + if mailmap_path == mailmap_path.parent: > > + print("Error: No .mailmap file found", file=sys.stderr) > > + sys.exit(1) > > + mailmap_path = mailmap_path.parent > > + mailmap_path = mailmap_path / ".mailmap" > > + > > + # Handle operations > > + operations = {"add": add_entry, "check": check_mailmap, "sort": > > sort_mailmap} > > + operations[args.operation](mailmap_path, args) > > + > > + > > +if __name__ == "__main__": > > + main() > > -- > > 2.48.1 >