mirror of
https://github.com/StevenBlack/hosts.git
synced 2024-07-02 18:45:52 +02:00
Introduction of the support of RAW lines.
This patch fixes https://github.com/StevenBlack/hosts/issues/1563#issuecomment-810688754 Indeed, before this patch, the updater was not supporting a RAW (not hosts) list of domains as input. Changes: matches_exclusions(): (new) Support for rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. normalize_rule(): (edit) Apply DRY. (new) Support the normalization of the rule formatted as 'example.com' along with the pre-existing '0.0.0.0 example.com'. strip_rule(): (new) Complete rewrite in order to strip all possible lines.
This commit is contained in:
parent
261acf1dfa
commit
7971a0cbc9
|
@ -633,6 +633,30 @@ class TestMatchesExclusions(Base):
|
||||||
]:
|
]:
|
||||||
self.assertTrue(matches_exclusions(domain, exclusion_regexes))
|
self.assertTrue(matches_exclusions(domain, exclusion_regexes))
|
||||||
|
|
||||||
|
def test_match_raw_list(self):
|
||||||
|
exclusion_regexes = [r".*\.com", r".*\.org", r".*\.edu"]
|
||||||
|
exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes]
|
||||||
|
|
||||||
|
for domain in [
|
||||||
|
"hulu.com",
|
||||||
|
"yahoo.com",
|
||||||
|
"adaway.org",
|
||||||
|
"education.edu",
|
||||||
|
]:
|
||||||
|
self.assertTrue(matches_exclusions(domain, exclusion_regexes))
|
||||||
|
|
||||||
|
def test_no_match_raw_list(self):
|
||||||
|
exclusion_regexes = [r".*\.org", r".*\.edu"]
|
||||||
|
exclusion_regexes = [re.compile(regex) for regex in exclusion_regexes]
|
||||||
|
|
||||||
|
for domain in [
|
||||||
|
"localhost",
|
||||||
|
"hulu.com",
|
||||||
|
"yahoo.com",
|
||||||
|
"cloudfront.net",
|
||||||
|
]:
|
||||||
|
self.assertFalse(matches_exclusions(domain, exclusion_regexes))
|
||||||
|
|
||||||
|
|
||||||
# End Exclusion Logic
|
# End Exclusion Logic
|
||||||
|
|
||||||
|
@ -806,13 +830,11 @@ class TestNormalizeRule(BaseStdout):
|
||||||
def test_no_match(self):
|
def test_no_match(self):
|
||||||
kwargs = dict(target_ip="0.0.0.0", keep_domain_comments=False)
|
kwargs = dict(target_ip="0.0.0.0", keep_domain_comments=False)
|
||||||
|
|
||||||
|
# Note: "Bare"- Domains are accepted. IP are excluded.
|
||||||
for rule in [
|
for rule in [
|
||||||
"foo",
|
|
||||||
"128.0.0.1",
|
"128.0.0.1",
|
||||||
"bar.com/usa",
|
|
||||||
"0.0.0 google",
|
"0.0.0 google",
|
||||||
"0.1.2.3.4 foo/bar",
|
"0.1.2.3.4 foo/bar",
|
||||||
"twitter.com",
|
|
||||||
]:
|
]:
|
||||||
self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
|
self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
|
||||||
|
|
||||||
|
@ -874,13 +896,43 @@ class TestNormalizeRule(BaseStdout):
|
||||||
|
|
||||||
sys.stdout = StringIO()
|
sys.stdout = StringIO()
|
||||||
|
|
||||||
|
def test_no_comment_raw(self):
|
||||||
|
for rule in ("twitter.com", "google.com", "foo.bar.edu"):
|
||||||
|
expected = (rule, "0.0.0.0 " + rule + "\n")
|
||||||
|
|
||||||
class TestStripRule(Base):
|
actual = normalize_rule(
|
||||||
def test_strip_empty(self):
|
rule, target_ip="0.0.0.0", keep_domain_comments=False
|
||||||
for line in ["0.0.0.0", "domain.com", "foo"]:
|
)
|
||||||
output = strip_rule(line)
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
# Nothing gets printed if there's a match.
|
||||||
|
output = sys.stdout.getvalue()
|
||||||
self.assertEqual(output, "")
|
self.assertEqual(output, "")
|
||||||
|
|
||||||
|
sys.stdout = StringIO()
|
||||||
|
|
||||||
|
def test_with_comments_raw(self):
|
||||||
|
for target_ip in ("0.0.0.0", "127.0.0.1", "8.8.8.8"):
|
||||||
|
for comment in ("foo", "bar", "baz"):
|
||||||
|
rule = "1.google.co.uk " + comment
|
||||||
|
expected = (
|
||||||
|
"1.google.co.uk",
|
||||||
|
(str(target_ip) + " 1.google.co.uk # " + comment + "\n"),
|
||||||
|
)
|
||||||
|
|
||||||
|
actual = normalize_rule(
|
||||||
|
rule, target_ip=target_ip, keep_domain_comments=True
|
||||||
|
)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
# Nothing gets printed if there's a match.
|
||||||
|
output = sys.stdout.getvalue()
|
||||||
|
self.assertEqual(output, "")
|
||||||
|
|
||||||
|
sys.stdout = StringIO()
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripRule(Base):
|
||||||
def test_strip_exactly_two(self):
|
def test_strip_exactly_two(self):
|
||||||
for line in [
|
for line in [
|
||||||
"0.0.0.0 twitter.com",
|
"0.0.0.0 twitter.com",
|
||||||
|
@ -903,6 +955,28 @@ class TestStripRule(Base):
|
||||||
output = strip_rule(line + comment)
|
output = strip_rule(line + comment)
|
||||||
self.assertEqual(output, line + comment)
|
self.assertEqual(output, line + comment)
|
||||||
|
|
||||||
|
def test_strip_raw(self):
|
||||||
|
for line in [
|
||||||
|
"twitter.com",
|
||||||
|
"facebook.com",
|
||||||
|
"google.com",
|
||||||
|
"foo.bar.edu",
|
||||||
|
]:
|
||||||
|
output = strip_rule(line)
|
||||||
|
self.assertEqual(output, line)
|
||||||
|
|
||||||
|
def test_strip_raw_with_comment(self):
|
||||||
|
comment = " # comments here galore"
|
||||||
|
|
||||||
|
for line in [
|
||||||
|
"twitter.com",
|
||||||
|
"facebook.com",
|
||||||
|
"google.com",
|
||||||
|
"foo.bar.edu",
|
||||||
|
]:
|
||||||
|
output = strip_rule(line + comment)
|
||||||
|
self.assertEqual(output, line + comment)
|
||||||
|
|
||||||
|
|
||||||
class TestWriteOpeningHeader(BaseMockDir):
|
class TestWriteOpeningHeader(BaseMockDir):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
|
@ -20,6 +20,7 @@ import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
# Detecting Python 3 for version-dependent implementations
|
# Detecting Python 3 for version-dependent implementations
|
||||||
PY3 = sys.version_info >= (3, 0)
|
PY3 = sys.version_info >= (3, 0)
|
||||||
|
@ -629,7 +630,11 @@ def matches_exclusions(stripped_rule, exclusion_regexes):
|
||||||
Whether or not the rule string matches a provided exclusion.
|
Whether or not the rule string matches a provided exclusion.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
stripped_domain = stripped_rule.split()[1]
|
stripped_domain = stripped_rule.split()[1]
|
||||||
|
except IndexError:
|
||||||
|
# Example: 'example.org' instead of '0.0.0.0 example.org'
|
||||||
|
stripped_domain = stripped_rule
|
||||||
|
|
||||||
for exclusionRegex in exclusion_regexes:
|
for exclusionRegex in exclusion_regexes:
|
||||||
if exclusionRegex.search(stripped_domain):
|
if exclusionRegex.search(stripped_domain):
|
||||||
|
@ -981,6 +986,35 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
|
||||||
and spacing reformatted.
|
and spacing reformatted.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def normalize_response(extracted_hostname: str, extracted_suffix: Optional[str]) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Normalizes the responses after the provision of the extracted
|
||||||
|
hostname and suffix - if exist.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
extracted_hostname: str
|
||||||
|
The extracted hostname to work with.
|
||||||
|
extracted_suffix: str
|
||||||
|
The extracted suffix to with.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
normalized_response: tuple
|
||||||
|
A tuple of the hostname and the rule string with spelling
|
||||||
|
and spacing reformatted.
|
||||||
|
"""
|
||||||
|
|
||||||
|
rule = "%s %s" % (target_ip, extracted_hostname)
|
||||||
|
|
||||||
|
if keep_domain_comments and extracted_suffix:
|
||||||
|
if not extracted_suffix.strip().startswith("#"):
|
||||||
|
rule += " #%s" % extracted_suffix
|
||||||
|
else:
|
||||||
|
rule += " %s" % extracted_suffix
|
||||||
|
|
||||||
|
return extracted_hostname, rule + "\n"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
first try: IP followed by domain
|
first try: IP followed by domain
|
||||||
"""
|
"""
|
||||||
|
@ -992,15 +1026,8 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
|
||||||
|
|
||||||
# Explicitly lowercase and trim the hostname.
|
# Explicitly lowercase and trim the hostname.
|
||||||
hostname = hostname.lower().strip()
|
hostname = hostname.lower().strip()
|
||||||
rule = "%s %s" % (target_ip, hostname)
|
|
||||||
|
|
||||||
if suffix and keep_domain_comments:
|
return normalize_response(hostname, suffix)
|
||||||
if not suffix.strip().startswith("#"):
|
|
||||||
rule += " #%s" % suffix
|
|
||||||
else:
|
|
||||||
rule += " %s" % suffix
|
|
||||||
|
|
||||||
return hostname, rule + "\n"
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
next try: IP address followed by host IP address
|
next try: IP address followed by host IP address
|
||||||
|
@ -1012,15 +1039,22 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
|
||||||
ip_host, suffix = result.group(2, 3)
|
ip_host, suffix = result.group(2, 3)
|
||||||
# Explicitly trim the ip host.
|
# Explicitly trim the ip host.
|
||||||
ip_host = ip_host.strip()
|
ip_host = ip_host.strip()
|
||||||
rule = "%s %s" % (target_ip, ip_host)
|
|
||||||
|
|
||||||
if suffix and keep_domain_comments:
|
return normalize_response(ip_host, suffix)
|
||||||
if not suffix.strip().startswith("#"):
|
|
||||||
rule += " #%s" % suffix
|
|
||||||
else:
|
|
||||||
rule += " %s" % suffix
|
|
||||||
|
|
||||||
return ip_host, rule + "\n"
|
"""
|
||||||
|
next try: Keep RAW domain.
|
||||||
|
"""
|
||||||
|
regex = r"^\s*([\w\.-]+[a-zA-Z])(.*)"
|
||||||
|
result = re.search(regex, rule)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
hostname, suffix = result.group(1, 2)
|
||||||
|
|
||||||
|
# Explicitly lowercase and trim the hostname.
|
||||||
|
hostname = hostname.lower().strip()
|
||||||
|
|
||||||
|
return normalize_response(hostname, suffix)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
finally, if we get here, just belch to screen
|
finally, if we get here, just belch to screen
|
||||||
|
@ -1044,12 +1078,7 @@ def strip_rule(line):
|
||||||
The sanitized rule.
|
The sanitized rule.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
split_line = line.split()
|
return " ".join(line.split())
|
||||||
if len(split_line) < 2:
|
|
||||||
# just return blank
|
|
||||||
return ""
|
|
||||||
else:
|
|
||||||
return " ".join(split_line)
|
|
||||||
|
|
||||||
|
|
||||||
def write_opening_header(final_file, **header_params):
|
def write_opening_header(final_file, **header_params):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user