From 568b3ffe5e057d17fdebf0ec05a97e01029b7068 Mon Sep 17 00:00:00 2001 From: gareth-cross Date: Mon, 30 Dec 2024 17:03:16 -0800 Subject: [PATCH 1/2] Fix handling of docstrings with tokenization errors --- mypy/stubdoc.py | 24 ++++++++++++------ mypy/test/teststubgen.py | 53 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/mypy/stubdoc.py b/mypy/stubdoc.py index e99204f3ade53..af555b7a0c899 100644 --- a/mypy/stubdoc.py +++ b/mypy/stubdoc.py @@ -347,14 +347,22 @@ def infer_sig_from_docstring(docstr: str | None, name: str) -> list[FunctionSig] return None state = DocStringParser(name) - # Return all found signatures, even if there is a parse error after some are found. - with contextlib.suppress(tokenize.TokenError): - try: - tokens = tokenize.tokenize(io.BytesIO(docstr.encode("utf-8")).readline) - for token in tokens: - state.add_token(token) - except IndentationError: - return None + + # Keep tokenizing after an error. If `TokenError` is enountered, tokenize() will + # stop. We check the remaining bytes in bytes_io and resume tokenizing on the next + # loop iteration. + encoded_docstr = docstr.encode("utf-8") + bytes_io = io.BytesIO(encoded_docstr) + while bytes_io.tell() < len(encoded_docstr): + # Return all found signatures, even if there is a parse error after some are found. + with contextlib.suppress(tokenize.TokenError): + try: + tokens = tokenize.tokenize(bytes_io.readline) + for token in tokens: + state.add_token(token) + except IndentationError: + return None + sigs = state.get_signatures() def is_unique_args(sig: FunctionSig) -> bool: diff --git a/mypy/test/teststubgen.py b/mypy/test/teststubgen.py index dffa1aa80c5db..21821d141c7a8 100644 --- a/mypy/test/teststubgen.py +++ b/mypy/test/teststubgen.py @@ -429,6 +429,59 @@ def test_infer_sig_from_docstring_square_brackets(self) -> None: == [] ) + def test_infer_sig_from_docstring_unterminated_string_literal(self) -> None: + docstring = """ + func(*args, **kwargs) + Overloaded function. + + 1. func(x: int) -> None + + This is a valid docstring with an "unterminated string literal. + + 2. func(x: int, y: int) -> str + + This is an overloaded method. + """ + sigs = infer_sig_from_docstring(docstring, name="func") + assert_equal( + sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None") + ) + assert_equal( + sigs[1], + FunctionSig( + name="func", + args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")], + ret_type="str", + ), + ) + + def test_infer_sig_from_docstring_latex(self) -> None: + docstring = """ + func(*args, **kwargs) + Overloaded function. + + 1. func(x: int) -> None + + .. math:: + \\mathbf{f}\\left(x\\right) = \\pi \\cdot x + + 2. func(x: int, y: int) -> str + + This is an overloaded method. + """ + sigs = infer_sig_from_docstring(docstring, name="func") + assert_equal( + sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None") + ) + assert_equal( + sigs[1], + FunctionSig( + name="func", + args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")], + ret_type="str", + ), + ) + def test_remove_misplaced_type_comments_1(self) -> None: good = """ \u1234 From 8cdb58c5b9d5472484d709f90193eeade0fbebf6 Mon Sep 17 00:00:00 2001 From: gareth-cross Date: Thu, 30 Jan 2025 19:57:52 -0800 Subject: [PATCH 2/2] Fix type checking error in teststubgen.py --- mypy/test/teststubgen.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypy/test/teststubgen.py b/mypy/test/teststubgen.py index 21821d141c7a8..43c844856f9bd 100644 --- a/mypy/test/teststubgen.py +++ b/mypy/test/teststubgen.py @@ -443,6 +443,7 @@ def test_infer_sig_from_docstring_unterminated_string_literal(self) -> None: This is an overloaded method. """ sigs = infer_sig_from_docstring(docstring, name="func") + assert sigs is not None, "Expected two signatures" assert_equal( sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None") ) @@ -470,6 +471,7 @@ def test_infer_sig_from_docstring_latex(self) -> None: This is an overloaded method. """ sigs = infer_sig_from_docstring(docstring, name="func") + assert sigs is not None, "Expected two signatures" assert_equal( sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None") )