-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Fix PEP 0 name parsing #1386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix PEP 0 name parsing #1386
Changes from 1 commit
545bea8
90bbb4c
a1013ce
552a7b6
7a0b5b5
3c6520d
ee33701
efdaf15
8f9db05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,34 +62,38 @@ class Author(object): | |
|
||
def __init__(self, author_and_email_tuple): | ||
"""Parse the name and email address of an author.""" | ||
self.first = self.last = '' | ||
|
||
name, email = author_and_email_tuple | ||
self.first_last = name.strip() | ||
self.email = email.lower() | ||
last_name_fragment, suffix = self._last_name(name) | ||
name_sep = name.index(last_name_fragment) | ||
self.first = name[:name_sep].rstrip() | ||
self.last = last_name_fragment | ||
if self.last[1] == u'.': | ||
# Add an escape to avoid docutils turning `v.` into `22.`. | ||
self.last = u'\\' + self.last | ||
self.suffix = suffix | ||
if not self.first: | ||
self.last_first = self.last | ||
|
||
name_dict = self._parse_name(name) | ||
self.suffix = name_dict.get("suffix") | ||
if name_dict.get("name"): | ||
self.last_first = name_dict["name"] | ||
self.nick = name_dict["name"] | ||
else: | ||
self.last_first = u', '.join([self.last, self.first]) | ||
if self.suffix: | ||
self.last_first += u', ' + self.suffix | ||
self.first = name_dict["forename"].rstrip() | ||
self.last = name_dict["surname"] | ||
if self.last[1] == ".": | ||
# Add an escape to avoid docutils turning `v.` into `22.`. | ||
self.last = "\\" + self.last | ||
self.last_first = ", ".join([self.last, self.first]) | ||
self.nick = self.last | ||
|
||
if self.suffix: | ||
self.last_first += ", " + self.suffix | ||
|
||
if self.last == "van Rossum": | ||
# Special case for our beloved BDFL. :) | ||
if self.first == "Guido": | ||
self.nick = "GvR" | ||
elif self.first == "Just": | ||
self.nick = "JvR" | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
raise ValueError("unknown van Rossum %r!" % self) | ||
self.last_first += " (%s)" % (self.nick,) | ||
else: | ||
self.nick = self.last | ||
raise ValueError(f"unknown van Rossum {self}!") | ||
AA-Turner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.last_first += f" ({self.nick})" | ||
|
||
def __hash__(self): | ||
return hash(self.first_last) | ||
|
@@ -109,28 +113,64 @@ def sort_by(self): | |
base = self.last.lower() | ||
return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') | ||
|
||
def _last_name(self, full_name): | ||
"""Find the last name (or nickname) of a full name. | ||
@staticmethod | ||
def _parse_name(full_name): | ||
"""Decompose a full name into parts. | ||
|
||
If no last name (e.g, 'Aahz') then return the full name. If there is | ||
a leading, lowercase portion to the last name (e.g., 'van' or 'von') | ||
then include it. If there is a suffix (e.g., 'Jr.') that is appended | ||
through a comma, then drop the suffix. | ||
If a mononym (e.g, 'Aahz') then return the full name. If there are | ||
suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract | ||
them. If there is a middle initial followed by a full stop, then | ||
combine the following words into a surname (e.g. N. Vander Weele). If | ||
there is a leading, lowercase portion to the last name (e.g. 'van' or | ||
'von') then include it in the surname. | ||
|
||
""" | ||
name_partition = full_name.partition(u',') | ||
no_suffix = name_partition[0].strip() | ||
suffix = name_partition[2].strip() | ||
name_parts = no_suffix.split() | ||
part_count = len(name_parts) | ||
if part_count == 1 or part_count == 2: | ||
return name_parts[-1], suffix | ||
else: | ||
assert part_count > 2 | ||
possible_suffixes = ["Jr", "Jr.", "II", "III"] | ||
special_cases = ["The Python core team and community"] | ||
|
||
if full_name in special_cases: | ||
return {"name": full_name} | ||
|
||
suffix_partition = full_name.partition(",") | ||
pre_suffix = suffix_partition[0].strip() | ||
suffix = suffix_partition[2].strip() | ||
|
||
name_parts = pre_suffix.split(" ") | ||
num_parts = len(name_parts) | ||
name = {"suffix": suffix} | ||
|
||
if num_parts == 0: | ||
raise ValueError("Name is empty!") | ||
elif num_parts == 1: | ||
name.update(name=name_parts[0]) | ||
elif num_parts == 2: | ||
name.update(forename=name_parts[0], surname=name_parts[1]) | ||
elif num_parts > 2: | ||
# handles III etc. | ||
if name_parts[-1] in possible_suffixes: | ||
new_suffix = " ".join([*name_parts[-1:], suffix]).strip() | ||
name_parts.pop(-1) | ||
name.update(suffix=new_suffix) | ||
|
||
# handles von, van, v. etc. | ||
if name_parts[-2].islower(): | ||
return u' '.join(name_parts[-2:]), suffix | ||
forename = " ".join(name_parts[:-2]) | ||
surname = " ".join(name_parts[-2:]) | ||
name.update(forename=forename, surname=surname) | ||
|
||
# handles double surnames after a middle initial (e.g. N. Vander Weele) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is tough, a name like That said, a name written in Japanese English convention like I guess we have to accept imperfect for now, add special cases when we notice problems (what PEP will add the first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Luckily (?!) in PEP 458
Or UK, Australia, Canada etc 😜. But I get your point. I'm reminded of this post about names - hopefully #40 doesn't apply to us... I think that your last suggestion having some sort of lookup table is probably the best solution, as in all the PEPs there are still only a relativley small number of authors (248) - it's quite late here so will add that feature tommorow. It also keeps special cases etc. out of the code to keep it from becoming knobbly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be OK if this PEP fixed the most egregious cases ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gives me something to do! Latest commit adds such a metadata lookup and therefore simplifies the name parsing code. This should make it so that names can be correctly entered into AUTHORS.csv and PEP 0 will reflect this. I've also identified some duplicate entries (e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I would add multiple entries to the data file rather than editing historical documents. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latest data file (exceptions rather than full mapping) doesn’t de-duplicate these entries, should it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The data file is checked first (in init), so unsure where duplicates would propogate from? Always good to be preventative but not sure I understand this one, sorry! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe my comment doesn’t make sense! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha! I forgot to add that back in, you're right - have done so now (only adding the less used variant and mapping it to the 'cannonical' variant, to keep the file smaller) |
||
elif any(s.endswith(".") for s in name_parts): | ||
split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1 | ||
forename = " ".join(name_parts[:split_position]) | ||
surname = " ".join(name_parts[split_position:]) | ||
name.update(forename=forename, surname=surname) | ||
|
||
else: | ||
return name_parts[-1], suffix | ||
forename = " ".join(name_parts[:-1]) | ||
surname = " ".join(name_parts[-1:]) | ||
name.update(forename=forename, surname=surname) | ||
|
||
return name | ||
|
||
|
||
class PEP(object): | ||
|
Uh oh!
There was an error while loading. Please reload this page.