Introduction of the support of RAW lines.

This patch fixes https://github.com/StevenBlack/hosts/issues/1563#issuecomment-810688754 Indeed, before this patch, the updater was not supporting a RAW (not hosts) list of domains as input. Changes: matches_exclusions(): (new) Support for rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. normalize_rule(): (edit) Apply DRY. (new) Support the normalization of the rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. strip_rule(): (new) Complete rewrite in order to strip all possible lines.
2024-07-02 18:45:52 +02:00 · 2021-03-31 14:51:54 +02:00 · 2021-03-31 14:51:54 +02:00 · 7971a0cbc9
commit 7971a0cbc9
parent 261acf1dfa
2 changed files with 132 additions and 29 deletions
--- a/testUpdateHostsFile.py
+++ b/testUpdateHostsFile.py
@ -633,6 +633,30 @@ class TestMatchesExclusions(Base):
        ]:
            self.assertTrue(matches_exclusions(domain, exclusion_regexes))
    def test_match_raw_list(self):
        exclusion_regexes = [r".*\.com", r".*\.org", r".*\.edu"]
        exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes]
        for domain in [
            "hulu.com",
            "yahoo.com",
            "adaway.org",
            "education.edu",
        ]:
            self.assertTrue(matches_exclusions(domain, exclusion_regexes))
    def test_no_match_raw_list(self):
        exclusion_regexes = [r".*\.org", r".*\.edu"]
        exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes]
        for domain in [
            "localhost",
            "hulu.com",
            "yahoo.com",
            "cloudfront.net",
        ]:
            self.assertFalse(matches_exclusions(domain, exclusion_regexes))
 # End Exclusion Logic
@ -806,13 +830,11 @@ class TestNormalizeRule(BaseStdout):
    def test_no_match(self):
        kwargs = dict(target_ip="0.0.0.0", keep_domain_comments=False)
        # Note: "Bare"- Domains are accepted. IP are excluded.
        for rule in [
            "foo",
            "128.0.0.1",
            "bar.com/usa",
            "0.0.0 google",
            "0.1.2.3.4 foo/bar",
            "twitter.com",
        ]:
            self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
@ -874,13 +896,43 @@ class TestNormalizeRule(BaseStdout):
            sys.stdout = StringIO()
    def test_no_comment_raw(self):
        for rule in ("twitter.com", "google.com", "foo.bar.edu"):
            expected = (rule, "0.0.0.0 " + rule + "\n")
-class TestStripRule(Base):
+            actual = normalize_rule(
-    def test_strip_empty(self):
+                rule, target_ip="0.0.0.0", keep_domain_comments=False
-        for line in ["0.0.0.0", "domain.com", "foo"]:
+            )
-            output = strip_rule(line)
+            self.assertEqual(actual, expected)
            # Nothing gets printed if there's a match.
            output = sys.stdout.getvalue()
            self.assertEqual(output, "")
            sys.stdout = StringIO()
    def test_with_comments_raw(self):
        for target_ip in ("0.0.0.0", "127.0.0.1", "8.8.8.8"):
            for comment in ("foo", "bar", "baz"):
                rule = "1.google.co.uk " + comment
                expected = (
                    "1.google.co.uk",
                    (str(target_ip) + " 1.google.co.uk # " + comment + "\n"),
                )
                actual = normalize_rule(
                    rule, target_ip=target_ip, keep_domain_comments=True
                )
                self.assertEqual(actual, expected)
                # Nothing gets printed if there's a match.
                output = sys.stdout.getvalue()
                self.assertEqual(output, "")
                sys.stdout = StringIO()
 class TestStripRule(Base):
    def test_strip_exactly_two(self):
        for line in [
            "0.0.0.0 twitter.com",
@ -903,6 +955,28 @@ class TestStripRule(Base):
            output = strip_rule(line + comment)
            self.assertEqual(output, line + comment)
    def test_strip_raw(self):
        for line in [
            "twitter.com",
            "facebook.com",
            "google.com",
            "foo.bar.edu",
        ]:
            output = strip_rule(line)
            self.assertEqual(output, line)
    def test_strip_raw_with_comment(self):
        comment = " # comments here galore"
        for line in [
            "twitter.com",
            "facebook.com",
            "google.com",
            "foo.bar.edu",
        ]:
            output = strip_rule(line + comment)
            self.assertEqual(output, line + comment)
 class TestWriteOpeningHeader(BaseMockDir):
    def setUp(self):
--- a/updateHostsFile.py
+++ b/updateHostsFile.py
@ -20,6 +20,7 @@ import sys
 import tempfile
 import time
 from glob import glob
 from typing import Optional, Tuple
 # Detecting Python 3 for version-dependent implementations
 PY3 = sys.version_info >= (3, 0)
@ -629,7 +630,11 @@ def matches_exclusions(stripped_rule, exclusion_regexes):
        Whether or not the rule string matches a provided exclusion.
    """
    try:
        stripped_domain = stripped_rule.split()[1]
    except IndexError:
        # Example: 'example.org' instead of '0.0.0.0 example.org'
        stripped_domain = stripped_rule
    for exclusionRegex in exclusion_regexes:
        if exclusionRegex.search(stripped_domain):
@ -981,6 +986,35 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
        and spacing reformatted.
    """
    def normalize_response(extracted_hostname: str, extracted_suffix: Optional[str]) -> Tuple[str, str]:
        """
        Normalizes the responses after the provision of the extracted
        hostname and suffix - if exist.
        Parameters
        ----------
        extracted_hostname: str
            The extracted hostname to work with.
        extracted_suffix: str
            The extracted suffix to with.
        Returns
        -------
        normalized_response: tuple
            A tuple of the hostname and the rule string with spelling
            and spacing reformatted.
        """
        rule = "%s %s" % (target_ip, extracted_hostname)
        if keep_domain_comments and extracted_suffix:
            if not extracted_suffix.strip().startswith("#"):
                rule += " #%s" % extracted_suffix
            else:
                rule += " %s" % extracted_suffix
        return extracted_hostname, rule + "\n"
    """
    first try: IP followed by domain
    """
@ -992,15 +1026,8 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
        # Explicitly lowercase and trim the hostname.
        hostname = hostname.lower().strip()
        rule = "%s %s" % (target_ip, hostname)
-        if suffix and keep_domain_comments:
+        return normalize_response(hostname, suffix)
            if not suffix.strip().startswith("#"):
                rule += " #%s" % suffix
            else:
                rule += " %s" % suffix
        return hostname, rule + "\n"
    """
    next try: IP address followed by host IP address
@ -1012,15 +1039,22 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
        ip_host, suffix = result.group(2, 3)
        # Explicitly trim the ip host.
        ip_host = ip_host.strip()
        rule = "%s %s" % (target_ip, ip_host)
-        if suffix and keep_domain_comments:
+        return normalize_response(ip_host, suffix)
            if not suffix.strip().startswith("#"):
                rule += " #%s" % suffix
            else:
                rule += " %s" % suffix
-        return ip_host, rule + "\n"
+    """
    next try: Keep RAW domain.
    """
    regex = r"^\s*([\w\.-]+[a-zA-Z])(.*)"
    result = re.search(regex, rule)
    if result:
        hostname, suffix = result.group(1, 2)
        # Explicitly lowercase and trim the hostname.
        hostname = hostname.lower().strip()
        return normalize_response(hostname, suffix)
    """
    finally, if we get here, just belch to screen
@ -1044,12 +1078,7 @@ def strip_rule(line):
        The sanitized rule.
    """
-    split_line = line.split()
+    return " ".join(line.split())
    if len(split_line) < 2:
        # just return blank
        return ""
    else:
        return " ".join(split_line)
 def write_opening_header(final_file, **header_params):