From 8450791c5099149a245dbb9afa16e13035a72590 Mon Sep 17 00:00:00 2001
From: Andrey Rakhmatullin <wrar@wrar.name>
Date: Sat, 22 Feb 2025 21:02:23 +0500
Subject: [PATCH] Migrate to ruff.

---
 .bumpversion.cfg                   |   7 --
 .coveragerc                        |   4 -
 .flake8                            |  15 ---
 .pre-commit-config.yaml            |  25 ++---
 pyproject.toml                     | 145 ++++++++++++++++++++++++++++-
 setup.cfg                          |   8 --
 setup.py                           |   5 +-
 src/protego.py                     |  64 ++++++-------
 tests/fetch_robotstxt.py           |  21 +++--
 tests/test_on_fetched_robotstxt.py |  28 +++---
 tests/test_on_google_spec.py       |  12 +--
 tests/test_protego.py              | 128 +++++++++----------------
 tests/test_unquote.py              |  37 ++++----
 tox.ini                            |   2 -
 14 files changed, 266 insertions(+), 235 deletions(-)
 delete mode 100644 .bumpversion.cfg
 delete mode 100644 .coveragerc
 delete mode 100644 .flake8
 delete mode 100644 setup.cfg

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
deleted file mode 100644
index 72dffe4..0000000
--- a/.bumpversion.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-[bumpversion]
-current_version = 0.4.0
-commit = True
-tag = True
-tag_name = {new_version}
-
-[bumpversion:file:setup.py]
diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 584a9e7..0000000
--- a/.coveragerc
+++ /dev/null
@@ -1,4 +0,0 @@
-[run]
-branch = true
-include = src/*
-omit = tests/*
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index dbfa735..0000000
--- a/.flake8
+++ /dev/null
@@ -1,15 +0,0 @@
-[flake8]
-ignore =
-    # Refers to the max-line length. Let's suppress the error and simply
-    # let black take care on how it wants to format the lines.
-    E501,
-
-    # Refers to "line break before binary operator".
-    # Similar to above, let black take care of the formatting.
-    W503,
-
-    # Refers to "Unnecessary dict call - rewrite as a literal".
-    C408
-per-file-ignores =
-    # Ignore: "Found for loop that reassigns the iterable it is iterating with each iterable value"
-    src/protego.py:430:21:B020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5398be8..d3e17be 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,20 +1,7 @@
 repos:
-  - hooks:
-      - id: black
-    repo: https://github.com/psf/black
-    rev: 24.10.0
-  - hooks:
-      - id: isort
-        language_version: python3
-    repo: https://github.com/timothycrosley/isort
-    rev: 5.13.2
-  - hooks:
-      - id: flake8
-        language_version: python3
-        additional_dependencies:
-          - flake8-bugbear
-          - flake8-comprehensions
-          - flake8-debugger
-          - flake8-string-format
-    repo: https://github.com/pycqa/flake8
-    rev: 7.1.1
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.9.7
+  hooks:
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
diff --git a/pyproject.toml b/pyproject.toml
index aa7e766..6f39f9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,142 @@
-[tool.isort]
-profile = "black"
-multi_line_output = 3
+[tool.bumpversion]
+current_version = "0.4.0"
+commit = true
+tag = true
+tag_name = "{new_version}"
+
+[[tool.bumpversion.files]]
+filename = "setup.py"
+
+[tool.coverage.run]
+branch = true
+
+[tool.coverage.report]
+exclude_also = [
+    "if TYPE_CHECKING:",
+]
+
+[tool.ruff.lint]
+extend-select = [
+    # flake8-bugbear
+    "B",
+    # flake8-comprehensions
+    "C4",
+    # pydocstyle
+    "D",
+    # flake8-future-annotations
+    "FA",
+    # flynt
+    "FLY",
+    # refurb
+    "FURB",
+    # isort
+    "I",
+    # flake8-implicit-str-concat
+    "ISC",
+    # flake8-logging
+    "LOG",
+    # Perflint
+    "PERF",
+    # pygrep-hooks
+    "PGH",
+    # flake8-pie
+    "PIE",
+    # pylint
+    "PL",
+    # flake8-use-pathlib
+    "PTH",
+    # flake8-pyi
+    "PYI",
+    # flake8-quotes
+    "Q",
+    # flake8-return
+    "RET",
+    # flake8-raise
+    "RSE",
+    # Ruff-specific rules
+    "RUF",
+    # flake8-bandit
+    "S",
+    # flake8-simplify
+    "SIM",
+    # flake8-slots
+    "SLOT",
+    # flake8-debugger
+    "T10",
+    # flake8-type-checking
+    "TC",
+    # pyupgrade
+    "UP",
+    # pycodestyle warnings
+    "W",
+    # flake8-2020
+    "YTT",
+]
+ignore = [
+    # Missing docstring in public module
+    "D100",
+    # Missing docstring in public class
+    "D101",
+    # Missing docstring in public method
+    "D102",
+    # Missing docstring in public function
+    "D103",
+    # Missing docstring in public package
+    "D104",
+    # Missing docstring in magic method
+    "D105",
+    # Missing docstring in public nested class
+    "D106",
+    # Missing docstring in __init__
+    "D107",
+    # One-line docstring should fit on one line with quotes
+    "D200",
+    # No blank lines allowed after function docstring
+    "D202",
+    # 1 blank line required between summary line and description
+    "D205",
+    # Multi-line docstring closing quotes should be on a separate line
+    "D209",
+    # First line should end with a period
+    "D400",
+    # First line should be in imperative mood; try rephrasing
+    "D401",
+    # First line should not be the function's "signature"
+    "D402",
+    # First word of the first line should be properly capitalized
+    "D403",
+    # `try`-`except` within a loop incurs performance overhead
+    "PERF203",
+    # Too many return statements
+    "PLR0911",
+    # Too many branches
+    "PLR0912",
+    # Too many arguments in function definition
+    "PLR0913",
+    # Too many statements
+    "PLR0915",
+    # Magic value used in comparison
+    "PLR2004",
+    # `for` loop variable `line` overwritten by assignment target
+    "PLW2901",
+    # String contains ambiguous {}.
+    "RUF001",
+    # Docstring contains ambiguous {}.
+    "RUF002",
+    # Comment contains ambiguous {}.
+    "RUF003",
+    # Mutable class attributes should be annotated with `typing.ClassVar`
+    "RUF012",
+    # Use of `assert` detected
+    "S101",
+
+    # to be done when adding type hints
+    # Use `typing.NamedTuple` instead of `collections.namedtuple`
+    "PYI024",
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "pep257"
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["S"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 0f6b27e..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,8 +0,0 @@
-[bdist_wheel]
-universal = 1
-
-[metadata]
-description_file = README.rst
-
-[aliases]
-test=pytest tests
diff --git a/setup.py b/setup.py
index b5604bc..d410276 100755
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,12 @@
-#!/usr/bin/env python
+from pathlib import Path
+
 from setuptools import find_packages, setup
 
 setup(
     name="Protego",
     version="0.4.0",
     description="Pure-Python robots.txt parser with support for modern conventions",
-    long_description=open("README.rst").read(),
+    long_description=Path("README.rst").read_text(encoding="utf-8"),
     long_description_content_type="text/x-rst",
     url="https://github.com/scrapy/protego",
     author="Anubhav Patel",
diff --git a/src/protego.py b/src/protego.py
index 0d330f0..b2969fb 100644
--- a/src/protego.py
+++ b/src/protego.py
@@ -32,7 +32,7 @@
 
 _HEX_DIGITS = set("0123456789ABCDEFabcdef")
 
-__all__ = ["RequestRate", "Protego"]
+__all__ = ["Protego", "RequestRate"]
 
 
 def _is_valid_directive_field(field):
@@ -49,7 +49,7 @@ def _is_valid_directive_field(field):
     )
 
 
-class _URLPattern(object):
+class _URLPattern:
     """Internal class which represents a URL pattern."""
 
     def __init__(self, pattern):
@@ -96,11 +96,10 @@ def _prepare_pattern_for_regex(self, pattern):
                 s[index] = re.escape(substr)
             elif s[index] == "*":
                 s[index] = ".*?"
-        pattern = "".join(s)
-        return pattern
+        return "".join(s)
 
 
-class _RuleSet(object):
+class _RuleSet:
     """Internal class which stores rules for a user agent."""
 
     def __init__(self, parser_instance):
@@ -131,23 +130,21 @@ def hex_to_byte(h):
 
         # ignore contains %xy escapes for characters that are not
         # meant to be converted back.
-        ignore = {"{ord_c:02X}".format(ord_c=ord(c)) for c in ignore}
+        ignore = {f"{ord(c):02X}" for c in ignore}
 
         parts = url.split("%")
         parts[0] = parts[0].encode("utf-8")
 
         for i in range(1, len(parts)):
-            if len(parts[i]) >= 2:
-                # %xy is a valid escape only if x and y are hexadecimal digits.
-                if set(parts[i][:2]).issubset(_HEX_DIGITS):
-                    # make sure that all %xy escapes are in uppercase.
-                    hexcode = parts[i][:2].upper()
-                    leftover = parts[i][2:]
-                    if hexcode not in ignore:
-                        parts[i] = hex_to_byte(hexcode) + leftover.encode("utf-8")
-                        continue
-                    else:
-                        parts[i] = hexcode + leftover
+            # %xy is a valid escape only if x and y are hexadecimal digits.
+            if len(parts[i]) >= 2 and set(parts[i][:2]).issubset(_HEX_DIGITS):
+                # make sure that all %xy escapes are in uppercase.
+                hexcode = parts[i][:2].upper()
+                leftover = parts[i][2:]
+                if hexcode not in ignore:
+                    parts[i] = hex_to_byte(hexcode) + leftover.encode("utf-8")
+                    continue
+                parts[i] = hexcode + leftover
 
             # add back the '%' we removed during splitting.
             parts[i] = b"%" + parts[i].encode("utf-8")
@@ -158,8 +155,8 @@ def hexescape(self, char):
         """Escape char as RFC 2396 specifies"""
         hex_repr = hex(ord(char))[2:].upper()
         if len(hex_repr) == 1:
-            hex_repr = "0%s" % hex_repr
-        return "%" + hex_repr
+            hex_repr = f"0{hex_repr}"
+        return f"%{hex_repr}"
 
     def _quote_path(self, path):
         """Return percent encoded path."""
@@ -172,7 +169,7 @@ def _quote_path(self, path):
         return path or "/"
 
     def _quote_pattern(self, pattern):
-        if pattern.startswith("https://") or pattern.startswith("http://"):
+        if pattern.startswith(("https://", "http://")):
             pattern = "/" + pattern
         if pattern.startswith("//"):
             pattern = "//" + pattern
@@ -191,8 +188,7 @@ def _quote_pattern(self, pattern):
         parts = ParseResult(
             "", "", pattern + last_char, parts.params, parts.query, parts.fragment
         )
-        pattern = urlunparse(parts)
-        return pattern
+        return urlunparse(parts)
 
     def allow(self, pattern):
         if "$" in pattern:
@@ -244,10 +240,8 @@ def crawl_delay(self, delay):
         except ValueError:
             # Value is malformed, do nothing.
             logger.debug(
-                "Malformed rule at line {line_seen} : cannot set crawl delay to '{delay}'. "
-                "Ignoring this rule.".format(
-                    line_seen=self._parser_instance._total_line_seen, delay=delay
-                )
+                f"Malformed rule at line {self._parser_instance._total_line_seen} : "
+                f"cannot set crawl delay to '{delay}'. Ignoring this rule."
             )
             return
 
@@ -285,10 +279,8 @@ def request_rate(self, value):
         except Exception:
             # Value is malformed, do nothing.
             logger.debug(
-                "Malformed rule at line {line_seen} : cannot set request rate using '{value}'. "
-                "Ignoring this rule.".format(
-                    line_seen=self._parser_instance._total_line_seen, value=value
-                )
+                f"Malformed rule at line {self._parser_instance._total_line_seen} : "
+                f"cannot set request rate using '{value}'. Ignoring this rule."
             )
             return
 
@@ -312,16 +304,14 @@ def visit_time(self, value):
             start_time, end_time = self._parse_time_period(value, separator=" ")
         except Exception:
             logger.debug(
-                "Malformed rule at line {line_seen} : cannot set visit time using '{value}'. "
-                "Ignoring this rule.".format(
-                    line_seen=self._parser_instance._total_line_seen, value=value
-                )
+                f"Malformed rule at line {self._parser_instance._total_line_seen} : "
+                f"cannot set visit time using '{value}'. Ignoring this rule."
             )
             return
         self._visit_time = VisitTime(start_time, end_time)
 
 
-class Protego(object):
+class Protego:
     def __init__(self):
         # A dict mapping user agents (specified in robots.txt) to rule sets.
         self._user_agents = {}
@@ -403,9 +393,7 @@ def _parse_robotstxt(self, content):
                 and field not in _SITEMAP_DIRECTIVE
             ):
                 logger.debug(
-                    "Rule at line {line_seen} without any user agent to enforce it on.".format(
-                        line_seen=self._total_line_seen
-                    )
+                    f"Rule at line {self._total_line_seen} without any user agent to enforce it on."
                 )
                 continue
 
diff --git a/tests/fetch_robotstxt.py b/tests/fetch_robotstxt.py
index b5dddf9..adda1d8 100644
--- a/tests/fetch_robotstxt.py
+++ b/tests/fetch_robotstxt.py
@@ -2,12 +2,12 @@
 
 Usage
 -----
->>> python fetch_robotstxt.py -l top-10000-websites.txt -d test_data
+$ python fetch_robotstxt.py -l top-10000-websites.txt -d test_data
 """
 
 import argparse
-import os
 import sys
+from pathlib import Path
 from urllib.parse import ParseResult, urlparse, urlunparse
 
 import scrapy
@@ -20,6 +20,7 @@
     action="append",
     dest="websites",
     help="Adds to the list of websites.",
+    type=Path,
 )
 parser.add_argument(
     "-d",
@@ -27,6 +28,7 @@
     action="store",
     dest="directory",
     help="Directory to save robots.txt files.",
+    type=Path,
 )
 args = parser.parse_args()
 
@@ -39,23 +41,22 @@ class RobotstxtSpider(scrapy.Spider):
     name = "robotstxt_spider"
 
     def start_requests(self):
+        w: Path
         for w in args.websites:
-            if os.path.isfile(w):
-                with open(w, "r") as f:
+            if w.is_file():
+                with w.open() as f:
                     for domain in f:
-                        domain = domain.strip()
                         yield scrapy.Request(
-                            url="https://{}/robots.txt".format(domain),
+                            url=f"https://{domain.strip()}/robots.txt",
                             callback=self.parse,
                             errback=self.err_cb,
                         )
 
     def parse(self, response):
         filename = urlparse(response.url).netloc
-        if not os.path.exists(args.directory):
-            os.mkdir(args.directory)
-        with open(os.path.join(args.directory, filename), "wb") as f:
-            f.write(response.body)
+        if not args.directory.exists():
+            args.directory.mkdir()
+        (args.directory / filename).write_bytes(response.body)
 
     def err_cb(self, failure):
         request = failure.request
diff --git a/tests/test_on_fetched_robotstxt.py b/tests/test_on_fetched_robotstxt.py
index c03caac..ec3ea68 100644
--- a/tests/test_on_fetched_robotstxt.py
+++ b/tests/test_on_fetched_robotstxt.py
@@ -1,29 +1,23 @@
-from os import listdir
-from os.path import abspath, dirname, isfile, join
+from pathlib import Path
 
 import pytest
 
 from protego import Protego
 
-test_data_directory = join(dirname(abspath(__file__)), "test_data")
-robotstxts = [
-    f for f in listdir(test_data_directory) if isfile(join(test_data_directory, f))
-]
+test_data_directory = Path(__file__).parent / "test_data"
+robotstxts = [f for f in test_data_directory.iterdir() if f.is_file()]
 
 
 @pytest.mark.parametrize("path_to_robotstxt", robotstxts)
 def test_no_exceptions(path_to_robotstxt):
     try:
-        with open(join(test_data_directory, path_to_robotstxt), "rb") as f:
-            try:
-                content = f.read().decode("utf-8")
-            except UnicodeDecodeError:
-                # Downloaded robots.txt is malformed, ignore this
-                return
-            Protego.parse(content=content)
+        try:
+            content = path_to_robotstxt.read_bytes().decode("utf-8")
+        except UnicodeDecodeError:
+            # Downloaded robots.txt is malformed, ignore this
+            return
+        Protego.parse(content=content)
     except Exception as e:
         raise AssertionError(
-            "{error}. Exception raised while parsing {robots}".format(
-                error=e, robots=join(path_to_robotstxt, "robots.txt")
-            )
-        )
+            f"{e}. Exception raised while parsing http://{path_to_robotstxt.name}/robots.txt"
+        ) from e
diff --git a/tests/test_on_google_spec.py b/tests/test_on_google_spec.py
index 56e69e8..3e6a095 100644
--- a/tests/test_on_google_spec.py
+++ b/tests/test_on_google_spec.py
@@ -90,12 +90,10 @@ def test_user_agent_precedence(path, user_agent):
     ],
 )
 def test_path_matching(pattern, path, match):
-    content = """
+    content = f"""
     User-Agent: *
     disallow: {pattern}
-    """.format(
-        pattern=pattern
-    )
+    """
     rp = Protego.parse(content)
     assert (not rp.can_fetch(path, "*")) == match
 
@@ -110,11 +108,9 @@ def test_path_matching(pattern, path, match):
     ],
 )
 def test_record_precedence(rules, url, allowed):
-    content = """
+    content = f"""
     User-Agent: *
     {rules}
-    """.format(
-        rules=rules
-    )
+    """
     rp = Protego.parse(content)
     assert rp.can_fetch(url, "*") == allowed
diff --git a/tests/test_protego.py b/tests/test_protego.py
index 67337a8..c4cbf82 100644
--- a/tests/test_protego.py
+++ b/tests/test_protego.py
@@ -9,16 +9,13 @@
 class TestProtego(TestCase):
     def test_allowed(self):
         content = (
-            "User-agent: * \n"
-            "Disallow: /disallowed \n"
-            "Allow: /allowed \n"
-            "Crawl-delay: 10"
+            "User-agent: * \nDisallow: /disallowed \nAllow: /allowed \nCrawl-delay: 10"
         )
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/allowed", "*"))
         self.assertFalse(rp.can_fetch("https://www.site.local/disallowed", "*"))
 
-        content = "User-agent: * \n" "Disallow: /d \n" "Crawl-delay: 10"
+        content = "User-agent: * \nDisallow: /d \nCrawl-delay: 10"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/abc/d", "*"))
         self.assertFalse(rp.can_fetch("https://www.site.local/disallowed", "*"))
@@ -42,105 +39,101 @@ def test_malformed_disallow(self):
         self.assertFalse(rp.can_fetch("https://www.site.local/six", "*"))
 
     def test_length_based_precedence(self):
-        content = "User-agent: * \n" "Disallow: / \n" "Allow: /page"
+        content = "User-agent: * \nDisallow: / \nAllow: /page"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/page", "*"))
         self.assertFalse(rp.can_fetch("https://www.site.local/elsewhere", "*"))
 
-        content = "user-agent: FooBot\n" "disallow: /x/page.html\n" "allow: /x/\n"
+        content = "user-agent: FooBot\ndisallow: /x/page.html\nallow: /x/\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /x/page.html\n" "disallow: /x/\n"
+        content = "user-agent: FooBot\nallow: /x/page.html\ndisallow: /x/\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x/", "FooBot"))
 
         # In case of equivalent disallow and allow patterns for the same
         # user-agent, allow is used.
-        content = "user-agent: FooBot\n" "disallow: \n" "allow: \n"
+        content = "user-agent: FooBot\ndisallow: \nallow: \n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
 
-        content = "user-agent: FooBot\n" "disallow: /\n" "allow: /\n"
+        content = "user-agent: FooBot\ndisallow: /\nallow: /\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
 
-        content = "user-agent: FooBot\n" "disallow: /x\n" "allow: /x/\n"
+        content = "user-agent: FooBot\ndisallow: /x\nallow: /x/\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x", "FooBot"))
 
-        content = (
-            "user-agent: FooBot\n" "disallow: /x/page.html\n" "allow: /x/page.html\n"
-        )
+        content = "user-agent: FooBot\ndisallow: /x/page.html\nallow: /x/page.html\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /page\n" "disallow: /*.html\n"
+        content = "user-agent: FooBot\nallow: /page\ndisallow: /*.html\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/page", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/page.html", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /x/page.\n" "disallow: /*.html\n"
+        content = "user-agent: FooBot\nallow: /x/page.\ndisallow: /*.html\n"
         rp = Protego.parse(content=content)
         # Longest match wins.
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page.html", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x/y.html", "FooBot"))
 
-        content = (
-            "User-agent: *\n" "Disallow: /x/\n" "User-agent: FooBot\n" "Disallow: /y/\n"
-        )
+        content = "User-agent: *\nDisallow: /x/\nUser-agent: FooBot\nDisallow: /y/\n"
         rp = Protego.parse(content=content)
         # Most specific group for FooBot allows implicitly /x/page.
         self.assertTrue(rp.can_fetch("http://foo.bar/x/page", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/y/page", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /p\n" "disallow: /\n"
+        content = "user-agent: FooBot\nallow: /p\ndisallow: /\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://example.com/page", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /folder\n" "disallow: /folder\n"
+        content = "user-agent: FooBot\nallow: /folder\ndisallow: /folder\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://example.com/folder/page", "FooBot"))
 
-        content = "user-agent: FooBot\n" "disallow: /folder\n" "allow: /folder\n"
+        content = "user-agent: FooBot\ndisallow: /folder\nallow: /folder\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://example.com/folder/page", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /page\n" "disallow: /*.htm\n"
+        content = "user-agent: FooBot\nallow: /page\ndisallow: /*.htm\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://example.com/page.htm", "FooBot"))
 
-        content = "user-agent: FooBot\n" "allow: /$\n" "disallow: /\n"
+        content = "user-agent: FooBot\nallow: /$\ndisallow: /\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://example.com/", "FooBot"))
         self.assertFalse(rp.can_fetch("http://example.com/page.html", "FooBot"))
 
     def test_escaped_url(self):
-        content = "User-agent: * \n" "Disallow: / \n" "Allow: /a%3cd.html"
+        content = "User-agent: * \nDisallow: / \nAllow: /a%3cd.html"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/a<d.html", "*"))
         self.assertTrue(rp.can_fetch("https://www.site.local/a%3cd.html", "*"))
 
-        content = "User-agent: * \n" "Disallow: / \n" "Allow: /a%3c*"
+        content = "User-agent: * \nDisallow: / \nAllow: /a%3c*"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/a<d.html", "*"))
         self.assertTrue(rp.can_fetch("https://www.site.local/a%3cd.html", "*"))
 
     def test_unescaped_url(self):
-        content = "User-agent: * \n" "Disallow: / \n" "Allow: /a<d.html"
+        content = "User-agent: * \nDisallow: / \nAllow: /a<d.html"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/a<d.html", "*"))
         self.assertTrue(rp.can_fetch("https://www.site.local/a%3cd.html", "*"))
 
-        content = "User-agent: * \n" "Disallow: / \n" "Allow: /a<*"
+        content = "User-agent: * \nDisallow: / \nAllow: /a<*"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("https://www.site.local/a<d.html", "*"))
         self.assertTrue(rp.can_fetch("https://www.site.local/a%3cd.html", "*"))
 
     def test_url_parts(self):
-        content = "User-agent: * \n" "Disallow: /path;params?query \n"
+        content = "User-agent: * \nDisallow: /path;params?query \n"
         rp = Protego.parse(content=content)
         self.assertFalse(
             rp.can_fetch(
@@ -148,12 +141,12 @@ def test_url_parts(self):
             )
         )
 
-        content = "User-agent: * \n" "Disallow: /? \n"
+        content = "User-agent: * \nDisallow: /? \n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("/?query", "*"))
         self.assertTrue(rp.can_fetch("/", "*"))
 
-        content = "User-agent: * \n" "Disallow: /; \n"
+        content = "User-agent: * \nDisallow: /; \n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("/;params", "*"))
         self.assertTrue(rp.can_fetch("/", "*"))
@@ -176,20 +169,14 @@ def test_sitemaps(self):
 
     def test_no_sitemaps(self):
         content = (
-            "User-agent: * \n"
-            "Disallow: /disallowed \n"
-            "Allow: /allowed \n"
-            "Crawl-delay: 10"
+            "User-agent: * \nDisallow: /disallowed \nAllow: /allowed \nCrawl-delay: 10"
         )
         rp = Protego.parse(content=content)
         self.assertTrue(not list(rp.sitemaps))
 
     def test_no_preferred_host(self):
         content = (
-            "User-agent: * \n"
-            "Disallow: /disallowed \n"
-            "Allow: /allowed \n"
-            "Crawl-delay: 10"
+            "User-agent: * \nDisallow: /disallowed \nAllow: /allowed \nCrawl-delay: 10"
         )
         rp = Protego.parse(content=content)
         self.assertTrue(rp.preferred_host is None)
@@ -218,7 +205,7 @@ def test_malformed_crawl_delay(self):
         self.assertTrue(rp.crawl_delay("*") is None)
 
     def test_no_crawl_delay(self):
-        content = "User-agent: * \n" "Disallow: /disallowed \n" "Allow: /allowed"
+        content = "User-agent: * \nDisallow: /disallowed \nAllow: /allowed"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.crawl_delay("*") is None)
 
@@ -397,16 +384,14 @@ def test_allowed_wildcards(self):
         self.assertTrue(rp.can_fetch("xyz/foo.js", "Rule7TestBot"))
         self.assertTrue(rp.can_fetch("/inlife/daily/fashion-20160727/", "Rule7TestBot"))
 
-        content = (
-            "User-agent: FooBot\n" "Disallow: /foo/bar/quz\n" "Allow: /foo/*/qux\n"
-        )
+        content = "User-agent: FooBot\nDisallow: /foo/bar/quz\nAllow: /foo/*/qux\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/foo/bar/quz", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/quz", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo//quz", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bax/quz", "FooBot"))
 
-        content = "User-agent: FooBot\n" "Disallow: /foo/bar$\n" "Allow: /foo/bar/qux\n"
+        content = "User-agent: FooBot\nDisallow: /foo/bar$\nAllow: /foo/bar/qux\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/foo/bar", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/qux", "FooBot"))
@@ -591,10 +576,7 @@ def test_comments(self):
         self.assertTrue(rp.can_fetch("https://site.local/default-ua", "two"))
 
         content = (
-            "User-agent: FooBot\n"
-            "# Disallow: /\n"
-            "Disallow: /foo/quz#qux\n"
-            "Allow: /\n"
+            "User-agent: FooBot\n# Disallow: /\nDisallow: /foo/quz#qux\nAllow: /\n"
         )
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar", "FooBot"))
@@ -952,39 +934,19 @@ def test_line_endings(self):
         )
 
         unix_file = (
-            "User-Agent: foo\n"
-            "Allow: /some/path\n"
-            "User-Agent: bar\n"
-            "\n"
-            "\n"
-            "Disallow: /\n"
+            "User-Agent: foo\nAllow: /some/path\nUser-Agent: bar\n\n\nDisallow: /\n"
         )
 
         mac_file = (
-            "User-Agent: foo\r"
-            "Allow: /some/path\r"
-            "User-Agent: bar\r"
-            "\r"
-            "\r"
-            "Disallow: /\r"
+            "User-Agent: foo\rAllow: /some/path\rUser-Agent: bar\r\r\rDisallow: /\r"
         )
 
         no_final_line_ending = (
-            "User-Agent: foo\n"
-            "Allow: /some/path\n"
-            "User-Agent: bar\n"
-            "\n"
-            "\n"
-            "Disallow: /"
+            "User-Agent: foo\nAllow: /some/path\nUser-Agent: bar\n\n\nDisallow: /"
         )
 
         mixed_file = (
-            "User-Agent: foo\n"
-            "Allow: /some/path\r\n"
-            "User-Agent: bar\n"
-            "\r\n"
-            "\n"
-            "Disallow: /"
+            "User-Agent: foo\nAllow: /some/path\r\nUser-Agent: bar\n\r\n\nDisallow: /"
         )
 
         test_url = "http://site.local/some/path/"
@@ -1002,7 +964,7 @@ def test_line_endings(self):
             self.assertFalse(rp.can_fetch(test_url, "bar"))
 
     def test_index_html_is_directory(self):
-        content = "User-Agent: *\n" "Allow: /allowed-slash/index.html\n" "Disallow: /\n"
+        content = "User-Agent: *\nAllow: /allowed-slash/index.html\nDisallow: /\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.com/allowed-slash/", "footbot"))
         self.assertTrue(
@@ -1026,27 +988,27 @@ def test_percentage_encoding(self):
             )
         )
 
-        content = "User-agent: FooBot\n" "Disallow: /\n" "Allow: /foo/bar/ツ\n"
+        content = "User-agent: FooBot\nDisallow: /\nAllow: /foo/bar/ツ\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/%E3%83%84", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/ツ", "FooBot"))
 
-        content = "User-agent: FooBot\n" "Disallow: /\n" "Allow: /foo/bar/%E3%83%84\n"
+        content = "User-agent: FooBot\nDisallow: /\nAllow: /foo/bar/%E3%83%84\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/%E3%83%84", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/ツ", "FooBot"))
 
-        content = "User-agent: FooBot\n" "Disallow: /\n" "Allow: /foo/bar/%62%61%7A\n"
+        content = "User-agent: FooBot\nDisallow: /\nAllow: /foo/bar/%62%61%7A\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/baz", "FooBot"))
         self.assertTrue(rp.can_fetch("http://foo.bar/foo/bar/%62%61%7A", "FooBot"))
 
     def test_url_case_sensitivity(self):
-        content = "user-agent: FooBot\n" "disallow: /x/\n"
+        content = "user-agent: FooBot\ndisallow: /x/\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/x/y", "FooBot"))
 
-        content = "user-agent: FooBot\n" "disallow: /X/\n"
+        content = "user-agent: FooBot\ndisallow: /X/\n"
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/x/y", "FooBot"))
 
@@ -1071,9 +1033,7 @@ def test_nonterminal_dollar(self):
 
     def test_escaped_special_symbols(self):
         """Percent encoded special symbols should be treated as ordinary characters."""
-        content = (
-            "user-agent: FooBot\n" "disallow: /x/abc%24\n" "disallow: /x%2Ax/abc\n"
-        )
+        content = "user-agent: FooBot\ndisallow: /x/abc%24\ndisallow: /x%2Ax/abc\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/x/abc$abc", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x/abc$", "FooBot"))
@@ -1084,14 +1044,14 @@ def test_escaped_special_symbols(self):
 
     def test_special_symbols_dual_behaviour(self):
         """Special symbols such as * and $, should also be treated as an ordinary character"""
-        content = "user-agent: FooBot\n" "disallow: /x/abc$\n" "disallow: /x*x/abc\n"
+        content = "user-agent: FooBot\ndisallow: /x/abc$\ndisallow: /x*x/abc\n"
         rp = Protego.parse(content=content)
         self.assertFalse(rp.can_fetch("http://foo.bar/x*x/abc", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x/abc$", "FooBot"))
         self.assertFalse(rp.can_fetch("http://foo.bar/x/abc%24", "FooBot"))
 
     def test_with_absolute_urls(self):
-        content = "user-agent: *\n" "disallow: http://ms-web00.walkerplus.com/\n"
+        content = "user-agent: *\ndisallow: http://ms-web00.walkerplus.com/\n"
 
         rp = Protego.parse(content=content)
         self.assertTrue(rp.can_fetch("http://foo.bar/", "FooBot"))
@@ -1167,6 +1127,6 @@ def test_disallow_query_wildcard(self):
     ],
 )
 def test_leading_asterisk(allow, disallow, url, allowed):
-    content = f"User-Agent: *\n" f"allow: {allow}\n" f"disallow: {disallow}\n"
+    content = f"User-Agent: *\nallow: {allow}\ndisallow: {disallow}\n"
     rp = Protego.parse(content)
     assert rp.can_fetch(url, "*") == allowed
diff --git a/tests/test_unquote.py b/tests/test_unquote.py
index 1d5a854..c39a64e 100644
--- a/tests/test_unquote.py
+++ b/tests/test_unquote.py
@@ -6,12 +6,10 @@
 
 
 def _unquote(url, ignore="", errors="replace"):
-    global rs
     return rs._unquote(url, ignore, errors)
 
 
 def hexescape(char):
-    global rs
     return rs.hexescape(char)
 
 
@@ -26,7 +24,7 @@ def test_unquoting(self):
             expect = chr(num)
             result = _unquote(given)
             self.assertEqual(
-                expect, result, "using unquote(): %r != %r" % (expect, result)
+                expect, result, f"using unquote(): {expect!r} != {result!r}"
             )
             escape_list.append(given)
         escape_string = "".join(escape_list)
@@ -35,7 +33,7 @@ def test_unquoting(self):
         self.assertEqual(
             result.count("%"),
             1,
-            "using unquote(): not all characters escaped: " "%s" % result,
+            f"using unquote(): not all characters escaped: {result}",
         )
 
     def test_unquoting_badpercent(self):
@@ -43,69 +41,72 @@ def test_unquoting_badpercent(self):
         given = "%xab"
         expect = given
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
+
         given = "%x"
         expect = given
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
+
         given = "%"
         expect = given
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
     def test_unquoting_parts(self):
         # Make sure unquoting works when have non-quoted characters
         # interspersed
-        given = "ab%sd" % hexescape("c")
+        given = f"ab{hexescape('c')}d"
         expect = "abcd"
         result = _unquote(given)
-        self.assertEqual(expect, result, "using quote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using quote(): {expect!r} != {result!r}")
 
     def test_unquoting_plus(self):
         # Test difference between unquote() and unquote_plus()
         given = "are+there+spaces..."
         expect = given
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
     def test_unquote_with_unicode(self):
         # Characters in the Latin-1 range, encoded with UTF-8
         given = "br%C3%BCckner_sapporo_20050930.doc"
         expect = "br\u00fcckner_sapporo_20050930.doc"
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
+
         # Characters in the Latin-1 range, encoded with None (default)
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
         # Characters in BMP, encoded with UTF-8
         given = "%E6%BC%A2%E5%AD%97"
         expect = "\u6f22\u5b57"  # "Kanji"
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
         # Decode with UTF-8, invalid sequence
         given = "%F3%B1"
         expect = "\ufffd"  # Replacement character
         result = _unquote(given)
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
         # Decode with UTF-8, invalid sequence, replace errors
         result = _unquote(given, errors="replace")
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
         # Decode with UTF-8, invalid sequence, ignoring errors
         given = "%F3%B1"
         expect = ""
         result = _unquote(given, errors="ignore")
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
         # A mix of non-ASCII and percent-encoded characters, UTF-8
         result = _unquote("\u6f22%C3%BC")
         expect = "\u6f22\u00fc"
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
 
     def test_escape_sequence_uppercase(self):
         result = _unquote("%2fabc%7exyz", ignore="/~")
         expect = "%2Fabc%7Exyz"
-        self.assertEqual(expect, result, "using unquote(): %r != %r" % (expect, result))
+        self.assertEqual(expect, result, f"using unquote(): {expect!r} != {result!r}")
diff --git a/tox.ini b/tox.ini
index d1e28c8..e60baa2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,13 +6,11 @@ deps =
     pytest
     pytest-cov
 commands =
-    pip install -e .
     pytest --cov=protego --cov-report=xml --cov-report= {posargs:tests}
 
 [testenv:pypy3]
 basepython = pypy3
 commands =
-    pip install -e .
     pypy -m pytest {posargs:tests}
 
 [testenv:pre-commit]