mirror of
https://github.com/huggingface/transformers.git
synced 2025-07-31 02:02:21 +06:00
🚨🚨🚨 Limit backtracking in Nougat regexp (#35264)
* Limit backtracking in regexp * Update * [run-slow] nougat * Update
This commit is contained in:
parent
d29a06e39a
commit
deac971c46
@ -514,7 +514,7 @@ class NougatTokenizerFast(PreTrainedTokenizerFast):
|
||||
generation = generation.replace("\n* [leftmargin=*]\n", "\n")
|
||||
# Remove lines with markdown headings starting with #, with numerals,
|
||||
# and possibly roman numerals with trailing spaces and newlines
|
||||
generation = re.sub(r"^#+ (?:\.?(?:\d|[ixv])+)*\s*(?:$|\n\s*)", "", generation, flags=re.M)
|
||||
generation = re.sub(r"^#+ (?:[\d+\.]+|[ixv\.]+)?\s*(?:$|\n\s*)", "", generation, flags=re.M)
|
||||
# most likely hallucinated titles
|
||||
lines = generation.split("\n")
|
||||
if lines[-1].startswith("#") and lines[-1].lstrip("#").startswith(" ") and len(lines) > 1:
|
||||
|
Loading…
Reference in New Issue
Block a user