From 7971a0cbc91adb86b771561ddf19a03cb687a7dc Mon Sep 17 00:00:00 2001 From: funilrys Date: Wed, 31 Mar 2021 14:51:54 +0200 Subject: [PATCH] Introduction of the support of RAW lines. This patch fixes https://github.com/StevenBlack/hosts/issues/1563#issuecomment-810688754 Indeed, before this patch, the updater was not supporting a RAW (not hosts) list of domains as input. Changes: matches_exclusions(): (new) Support for rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. normalize_rule(): (edit) Apply DRY. (new) Support the normalization of the rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. strip_rule(): (new) Complete rewrite in order to strip all possible lines. --- testUpdateHostsFile.py | 88 ++++++++++++++++++++++++++++++++++++++---- updateHostsFile.py | 73 ++++++++++++++++++++++++----------- 2 files changed, 132 insertions(+), 29 deletions(-) diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 00669f09e..69597ee48 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -633,6 +633,30 @@ class TestMatchesExclusions(Base): ]: self.assertTrue(matches_exclusions(domain, exclusion_regexes)) + def test_match_raw_list(self): + exclusion_regexes = [r".*\.com", r".*\.org", r".*\.edu"] + exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes] + + for domain in [ + "hulu.com", + "yahoo.com", + "adaway.org", + "education.edu", + ]: + self.assertTrue(matches_exclusions(domain, exclusion_regexes)) + + def test_no_match_raw_list(self): + exclusion_regexes = [r".*\.org", r".*\.edu"] + exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes] + + for domain in [ + "localhost", + "hulu.com", + "yahoo.com", + "cloudfront.net", + ]: + self.assertFalse(matches_exclusions(domain, exclusion_regexes)) + # End Exclusion Logic @@ -806,13 +830,11 @@ class TestNormalizeRule(BaseStdout): def test_no_match(self): kwargs = dict(target_ip="0.0.0.0", keep_domain_comments=False) + # Note: "Bare"- Domains are accepted. IP are excluded. for rule in [ - "foo", "128.0.0.1", - "bar.com/usa", "0.0.0 google", "0.1.2.3.4 foo/bar", - "twitter.com", ]: self.assertEqual(normalize_rule(rule, **kwargs), (None, None)) @@ -874,13 +896,43 @@ class TestNormalizeRule(BaseStdout): sys.stdout = StringIO() + def test_no_comment_raw(self): + for rule in ("twitter.com", "google.com", "foo.bar.edu"): + expected = (rule, "0.0.0.0 " + rule + "\n") -class TestStripRule(Base): - def test_strip_empty(self): - for line in ["0.0.0.0", "domain.com", "foo"]: - output = strip_rule(line) + actual = normalize_rule( + rule, target_ip="0.0.0.0", keep_domain_comments=False + ) + self.assertEqual(actual, expected) + + # Nothing gets printed if there's a match. + output = sys.stdout.getvalue() self.assertEqual(output, "") + sys.stdout = StringIO() + + def test_with_comments_raw(self): + for target_ip in ("0.0.0.0", "127.0.0.1", "8.8.8.8"): + for comment in ("foo", "bar", "baz"): + rule = "1.google.co.uk " + comment + expected = ( + "1.google.co.uk", + (str(target_ip) + " 1.google.co.uk # " + comment + "\n"), + ) + + actual = normalize_rule( + rule, target_ip=target_ip, keep_domain_comments=True + ) + self.assertEqual(actual, expected) + + # Nothing gets printed if there's a match. + output = sys.stdout.getvalue() + self.assertEqual(output, "") + + sys.stdout = StringIO() + + +class TestStripRule(Base): def test_strip_exactly_two(self): for line in [ "0.0.0.0 twitter.com", @@ -903,6 +955,28 @@ class TestStripRule(Base): output = strip_rule(line + comment) self.assertEqual(output, line + comment) + def test_strip_raw(self): + for line in [ + "twitter.com", + "facebook.com", + "google.com", + "foo.bar.edu", + ]: + output = strip_rule(line) + self.assertEqual(output, line) + + def test_strip_raw_with_comment(self): + comment = " # comments here galore" + + for line in [ + "twitter.com", + "facebook.com", + "google.com", + "foo.bar.edu", + ]: + output = strip_rule(line + comment) + self.assertEqual(output, line + comment) + class TestWriteOpeningHeader(BaseMockDir): def setUp(self): diff --git a/updateHostsFile.py b/updateHostsFile.py index 3741bb7e7..7301f587a 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -20,6 +20,7 @@ import sys import tempfile import time from glob import glob +from typing import Optional, Tuple # Detecting Python 3 for version-dependent implementations PY3 = sys.version_info >= (3, 0) @@ -629,7 +630,11 @@ def matches_exclusions(stripped_rule, exclusion_regexes): Whether or not the rule string matches a provided exclusion. """ - stripped_domain = stripped_rule.split()[1] + try: + stripped_domain = stripped_rule.split()[1] + except IndexError: + # Example: 'example.org' instead of '0.0.0.0 example.org' + stripped_domain = stripped_rule for exclusionRegex in exclusion_regexes: if exclusionRegex.search(stripped_domain): @@ -981,6 +986,35 @@ def normalize_rule(rule, target_ip, keep_domain_comments): and spacing reformatted. """ + def normalize_response(extracted_hostname: str, extracted_suffix: Optional[str]) -> Tuple[str, str]: + """ + Normalizes the responses after the provision of the extracted + hostname and suffix - if exist. + + Parameters + ---------- + extracted_hostname: str + The extracted hostname to work with. + extracted_suffix: str + The extracted suffix to with. + + Returns + ------- + normalized_response: tuple + A tuple of the hostname and the rule string with spelling + and spacing reformatted. + """ + + rule = "%s %s" % (target_ip, extracted_hostname) + + if keep_domain_comments and extracted_suffix: + if not extracted_suffix.strip().startswith("#"): + rule += " #%s" % extracted_suffix + else: + rule += " %s" % extracted_suffix + + return extracted_hostname, rule + "\n" + """ first try: IP followed by domain """ @@ -992,15 +1026,8 @@ def normalize_rule(rule, target_ip, keep_domain_comments): # Explicitly lowercase and trim the hostname. hostname = hostname.lower().strip() - rule = "%s %s" % (target_ip, hostname) - if suffix and keep_domain_comments: - if not suffix.strip().startswith("#"): - rule += " #%s" % suffix - else: - rule += " %s" % suffix - - return hostname, rule + "\n" + return normalize_response(hostname, suffix) """ next try: IP address followed by host IP address @@ -1012,15 +1039,22 @@ def normalize_rule(rule, target_ip, keep_domain_comments): ip_host, suffix = result.group(2, 3) # Explicitly trim the ip host. ip_host = ip_host.strip() - rule = "%s %s" % (target_ip, ip_host) - if suffix and keep_domain_comments: - if not suffix.strip().startswith("#"): - rule += " #%s" % suffix - else: - rule += " %s" % suffix + return normalize_response(ip_host, suffix) - return ip_host, rule + "\n" + """ + next try: Keep RAW domain. + """ + regex = r"^\s*([\w\.-]+[a-zA-Z])(.*)" + result = re.search(regex, rule) + + if result: + hostname, suffix = result.group(1, 2) + + # Explicitly lowercase and trim the hostname. + hostname = hostname.lower().strip() + + return normalize_response(hostname, suffix) """ finally, if we get here, just belch to screen @@ -1044,12 +1078,7 @@ def strip_rule(line): The sanitized rule. """ - split_line = line.split() - if len(split_line) < 2: - # just return blank - return "" - else: - return " ".join(split_line) + return " ".join(line.split()) def write_opening_header(final_file, **header_params):