Skip to content

Commit ed48b44

Browse files
authored
Merge pull request #371 from kevinbackhouse/fix-redos
Fix ReDoS bugs
2 parents 0556a8c + 8ffefe4 commit ed48b44

2 files changed

Lines changed: 3 additions & 3 deletions

File tree

src/textacy/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@
166166
RE_ACRONYM: Pattern = re.compile(
167167
r"(?:^|(?<=\W))"
168168
r"(?:"
169-
r"(?:(?:(?:[A-Z]\.?)+[a-z0-9&/-]?)+(?:[A-Z][s.]?|\ds?))"
169+
r"(?:(?:(?:[A-Z]\.?)[a-z0-9&/-]?)+(?:[A-Z][s.]?|\ds?))"
170170
r"|"
171171
r"(?:\d(?:\-?[A-Z])+)"
172172
r")"

src/textacy/preprocessing/resources.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ def get_text(self) -> str:
6868
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
6969
r"|"
7070
# host name
71-
r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)"
71+
r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9])"
7272
# domain name
73-
r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*"
73+
r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9])*"
7474
# TLD identifier
7575
r"(?:\.(?:[a-z\u00a1-\uffff]{2,}))"
7676
r")"

0 commit comments

Comments
 (0)