Migrate tests from unittest to pure pytest. (#238)

scrapy · Feb 16, 2025 · cc15435 · cc15435
1 parent 6caf279
commit cc15435
Show file tree

Hide file tree

Showing 7 changed files with 777 additions and 871 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ coverage.xml
 /index.txt
 .dmypy.json
 .hypothesis/
+.idea/
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,6 @@ filename = "docs/conf.py"
 
 [tool.coverage.run]
 branch = true
-include = ["w3lib/*"]
 
 [tool.coverage.report]
 exclude_also = [
@@ -54,6 +53,7 @@ disable = [
     "raise-missing-from",
     "redefined-builtin",
     "redefined-outer-name",
+    "too-few-public-methods",
     "too-many-arguments",
     "too-many-branches",
     "too-many-lines",
@@ -91,6 +91,8 @@ extend-select = [
     "PIE",
     # pylint
     "PL",
+    # flake8-pytest-style
+    "PT",
     # flake8-use-pathlib
     "PTH",
     # flake8-pyi

diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import codecs
-import unittest
 from typing import Any
 
 from w3lib.encoding import (
@@ -14,7 +13,7 @@
 )
 
 
-class RequestEncodingTests(unittest.TestCase):
+class TestRequestEncoding:
     utf8_fragments = [
         # Content-Type as meta http-equiv
         b"""<meta http-equiv="content-type" content="text/html;charset=UTF-8" />""",
@@ -44,84 +43,84 @@ def test_bom(self):
             assert bom_encoding is not None
             assert bom is not None
             decoded = string[len(bom) :].decode(bom_encoding)
-            self.assertEqual(water_unicode, decoded)
+            assert water_unicode == decoded
         # Body without BOM
         enc, bom = read_bom(b"foo")
-        self.assertEqual(enc, None)
-        self.assertEqual(bom, None)
+        assert enc is None
+        assert bom is None
         # Empty body
         enc, bom = read_bom(b"")
-        self.assertEqual(enc, None)
-        self.assertEqual(bom, None)
+        assert enc is None
+        assert bom is None
 
     def test_http_encoding_header(self):
         header_value = "Content-Type: text/html; charset=ISO-8859-4"
         extracted = http_content_type_encoding(header_value)
-        self.assertEqual(extracted, "iso8859-4")
-        self.assertEqual(None, http_content_type_encoding("something else"))
+        assert extracted == "iso8859-4"
+        assert http_content_type_encoding("something else") is None
 
     def test_html_body_declared_encoding(self):
         for fragment in self.utf8_fragments:
             encoding = html_body_declared_encoding(fragment)
-            self.assertEqual(encoding, "utf-8", fragment)
-        self.assertEqual(None, html_body_declared_encoding(b"something else"))
-        self.assertEqual(
-            None,
+            assert encoding == "utf-8", fragment
+        assert None is html_body_declared_encoding(b"something else")
+        assert (
             html_body_declared_encoding(
                 b"""
             <head></head><body>
             this isn't searched
             <meta charset="utf-8">
         """
-            ),
+            )
+            is None
         )
-        self.assertEqual(
-            None,
+        assert (
             html_body_declared_encoding(
                 b"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
-            ),
+            )
+            is None
         )
 
     def test_html_body_declared_encoding_unicode(self):
         # html_body_declared_encoding should work when unicode body is passed
-        self.assertEqual(None, html_body_declared_encoding("something else"))
+        assert html_body_declared_encoding("something else") is None
 
         for fragment in self.utf8_fragments:
             encoding = html_body_declared_encoding(fragment.decode("utf8"))
-            self.assertEqual(encoding, "utf-8", fragment)
+            assert encoding == "utf-8", fragment
 
-        self.assertEqual(
-            None,
+        assert (
             html_body_declared_encoding(
                 """
             <head></head><body>
             this isn't searched
             <meta charset="utf-8">
         """
-            ),
+            )
+            is None
         )
-        self.assertEqual(
-            None,
+        assert (
             html_body_declared_encoding(
                 """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
-            ),
+            )
+            is None
         )
 
 
-class CodecsEncodingTestCase(unittest.TestCase):
+class TestCodecsEncoding:
     def test_resolve_encoding(self):
-        self.assertEqual(resolve_encoding("latin1"), "cp1252")
-        self.assertEqual(resolve_encoding(" Latin-1"), "cp1252")
-        self.assertEqual(resolve_encoding("gb_2312-80"), "gb18030")
-        self.assertEqual(resolve_encoding("unknown encoding"), None)
+        assert resolve_encoding("latin1") == "cp1252"
+        assert resolve_encoding(" Latin-1") == "cp1252"
+        assert resolve_encoding("gb_2312-80") == "gb18030"
+        assert resolve_encoding("unknown encoding") is None
 
 
-class UnicodeDecodingTestCase(unittest.TestCase):
+class TestUnicodeDecoding:
     def test_utf8(self):
-        self.assertEqual(to_unicode(b"\xc2\xa3", "utf-8"), "\xa3")
+        assert to_unicode(b"\xc2\xa3", "utf-8") == "\xa3"
 
     def test_invalid_utf8(self):
-        self.assertEqual(to_unicode(b"\xc2\xc2\xa3", "utf-8"), "\ufffd\xa3")
+        assert to_unicode(b"\xc2\xc2\xa3", "utf-8") == "\ufffd\xa3"
 
 
 def ct(charset: str | None) -> str | None:
@@ -132,14 +131,14 @@ def norm_encoding(enc: str) -> str:
     return codecs.lookup(enc).name
 
 
-class HtmlConversionTests(unittest.TestCase):
+class TestHtmlConversion:
     def test_unicode_body(self):
         unicode_string = "\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442"
         original_string = unicode_string.encode("cp1251")
         encoding, body_unicode = html_to_unicode(ct("cp1251"), original_string)
         # check body_as_unicode
-        self.assertTrue(isinstance(body_unicode, str))
-        self.assertEqual(body_unicode, unicode_string)
+        assert isinstance(body_unicode, str)
+        assert body_unicode == unicode_string
 
     def _assert_encoding(
         self,
@@ -150,15 +149,14 @@ def _assert_encoding(
     ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body)
-        self.assertTrue(isinstance(body_unicode, str))
-        self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
+        assert isinstance(body_unicode, str)
+        assert norm_encoding(encoding) == norm_encoding(expected_encoding)
 
         if isinstance(expected_unicode, str):
-            self.assertEqual(body_unicode, expected_unicode)
+            assert body_unicode == expected_unicode
         else:
-            self.assertTrue(
-                body_unicode in expected_unicode,
-                f"{body_unicode} is not in {expected_unicode}",
+            assert body_unicode in expected_unicode, (
+                f"{body_unicode} is not in {expected_unicode}"
             )
 
     def test_content_type_and_conversion(self):
@@ -227,8 +225,8 @@ def _assert_encoding_detected(
     ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
-        self.assertTrue(isinstance(body_unicode, str))
-        self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
+        assert isinstance(body_unicode, str)
+        assert norm_encoding(encoding) == norm_encoding(expected_encoding)
 
     def test_BOM(self):
         # utf-16 cases already tested, as is the BOM detection function