We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 0556a8c + 8ffefe4 commit ed48b44Copy full SHA for ed48b44
2 files changed
src/textacy/constants.py
@@ -166,7 +166,7 @@
166
RE_ACRONYM: Pattern = re.compile(
167
r"(?:^|(?<=\W))"
168
r"(?:"
169
- r"(?:(?:(?:[A-Z]\.?)+[a-z0-9&/-]?)+(?:[A-Z][s.]?|\ds?))"
+ r"(?:(?:(?:[A-Z]\.?)[a-z0-9&/-]?)+(?:[A-Z][s.]?|\ds?))"
170
r"|"
171
r"(?:\d(?:\-?[A-Z])+)"
172
r")"
src/textacy/preprocessing/resources.py
@@ -68,9 +68,9 @@ def get_text(self) -> str:
68
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
69
70
# host name
71
- r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)"
+ r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9])"
72
# domain name
73
- r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*"
+ r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9])*"
74
# TLD identifier
75
r"(?:\.(?:[a-z\u00a1-\uffff]{2,}))"
76
0 commit comments