Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of docstrings with tokenization errors (Fixes #18388) #18575

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions mypy/stubdoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,14 +347,22 @@ def infer_sig_from_docstring(docstr: str | None, name: str) -> list[FunctionSig]
return None

state = DocStringParser(name)
# Return all found signatures, even if there is a parse error after some are found.
with contextlib.suppress(tokenize.TokenError):
try:
tokens = tokenize.tokenize(io.BytesIO(docstr.encode("utf-8")).readline)
for token in tokens:
state.add_token(token)
except IndentationError:
return None

# Keep tokenizing after an error. If `TokenError` is enountered, tokenize() will
# stop. We check the remaining bytes in bytes_io and resume tokenizing on the next
# loop iteration.
encoded_docstr = docstr.encode("utf-8")
bytes_io = io.BytesIO(encoded_docstr)
while bytes_io.tell() < len(encoded_docstr):
# Return all found signatures, even if there is a parse error after some are found.
with contextlib.suppress(tokenize.TokenError):
try:
tokens = tokenize.tokenize(bytes_io.readline)
for token in tokens:
state.add_token(token)
except IndentationError:
return None

sigs = state.get_signatures()

def is_unique_args(sig: FunctionSig) -> bool:
Expand Down
55 changes: 55 additions & 0 deletions mypy/test/teststubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,61 @@ def test_infer_sig_from_docstring_square_brackets(self) -> None:
== []
)

def test_infer_sig_from_docstring_unterminated_string_literal(self) -> None:
docstring = """
func(*args, **kwargs)
Overloaded function.

1. func(x: int) -> None

This is a valid docstring with an "unterminated string literal.

2. func(x: int, y: int) -> str

This is an overloaded method.
"""
sigs = infer_sig_from_docstring(docstring, name="func")
assert sigs is not None, "Expected two signatures"
assert_equal(
sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None")
)
assert_equal(
sigs[1],
FunctionSig(
name="func",
args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")],
ret_type="str",
),
)

def test_infer_sig_from_docstring_latex(self) -> None:
docstring = """
func(*args, **kwargs)
Overloaded function.

1. func(x: int) -> None

.. math::
\\mathbf{f}\\left(x\\right) = \\pi \\cdot x

2. func(x: int, y: int) -> str

This is an overloaded method.
"""
sigs = infer_sig_from_docstring(docstring, name="func")
assert sigs is not None, "Expected two signatures"
assert_equal(
sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None")
)
assert_equal(
sigs[1],
FunctionSig(
name="func",
args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")],
ret_type="str",
),
)

def test_remove_misplaced_type_comments_1(self) -> None:
good = """
\u1234
Expand Down