Add versions that only render the extensions

This commit is contained in:
Dennis van de Hoef 2023-05-23 20:03:43 +02:00 committed by Steven Black
parent a4d9fb6c4c
commit 6d48930c6f
No known key found for this signature in database
GPG Key ID: 8B814B914974E6AF
5 changed files with 207 additions and 66 deletions

View File

@ -52,6 +52,7 @@ def update_readme_file():
if subprocess.call([sys.executable, "updateReadme.py"]):
print_failure("Failed to update readme file")
def recursively_loop_extensions(extension, extensions, current_extensions):
"""
Helper function that recursively calls itself to prevent manually creating
@ -59,6 +60,7 @@ def recursively_loop_extensions(extension, extensions, current_extensions):
Will call update_hosts_file for all combinations of extensions
"""
c_extensions = extensions.copy()
c_current_extensions = current_extensions.copy()
c_current_extensions.append(extension)
@ -68,6 +70,9 @@ def recursively_loop_extensions(extension, extensions, current_extensions):
params = ("-a", "-n", "-o", "alternates/"+name, "-e") + tuple(c_current_extensions)
update_hosts_file(*params)
params = ("-a", "-n", "-s", "--nounifiedhosts", "-o", "alternates/"+name+"-only", "-e") + tuple(c_current_extensions)
update_hosts_file(*params)
while len(c_extensions) > 0:
recursively_loop_extensions(c_extensions.pop(0), c_extensions, c_current_extensions)

View File

@ -20,7 +20,7 @@ sources can be found in the `hosts/data/` directory.
[![last commit](https://img.shields.io/github/last-commit/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master)
[![commit activity](https://img.shields.io/github/commit-activity/y/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master)
# Unified hosts file @EXTENSIONS_HEADER@
# @EXTENSIONS_HEADER@
This repository consolidates several reputable `hosts` files, and merges them
into a unified hosts file with duplicates removed. A variety of tailored hosts
@ -41,7 +41,7 @@ files are provided.
This repository offers
[15 different host file variants](https://github.com/StevenBlack/hosts/tree/master/alternates),
in addition to the base variant.
in addition to the base variant, with and without the unified hosts included.
The **Non GitHub mirror** is the link to use for some hosts file managers like
[Hostsman for Windows](https://www.abelhadigital.com/hostsman/) that don't work
@ -213,6 +213,9 @@ readmeData.json file used for generating readme.md files. This is useful if you
are generating host files with additional whitelists or blacklists and want to
keep your local checkout of this repo unmodified.
`--nounifiedhosts`: `false` (default) or `true`, do not include the unified hosts
file in the final hosts file. Usually used together with `--extensions`.
`--compress`, or `-c`: `false` (default) or `true`, _Compress_ the hosts file
ignoring non-necessary lines (empty lines and comments) and putting multiple
domains in each line. Reducing the number of lines of the hosts file improves

View File

@ -113,6 +113,7 @@ class TestGetDefaults(Base):
"keepdomaincomments": True,
"extensionspath": "foo" + self.sep + "extensions",
"extensions": [],
"nounifiedhosts": False,
"compress": False,
"minimise": False,
"outputsubfolder": "",
@ -679,6 +680,7 @@ class TestUpdateSourcesData(Base):
datapath=self.data_path,
extensionspath=self.extensions_path,
sourcedatafilename=self.source_data_filename,
nounifiedhosts=False,
)
def update_sources_data(self, sources_data, extensions):
@ -990,7 +992,7 @@ class TestWriteOpeningHeader(BaseMockDir):
def test_missing_keyword(self):
kwargs = dict(
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
)
for k in kwargs.keys():
@ -1003,7 +1005,7 @@ class TestWriteOpeningHeader(BaseMockDir):
def test_basic(self):
kwargs = dict(
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False
)
write_opening_header(self.final_file, **kwargs)
@ -1032,7 +1034,7 @@ class TestWriteOpeningHeader(BaseMockDir):
def test_basic_include_static_hosts(self):
kwargs = dict(
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
)
with self.mock_property("platform.system") as obj:
obj.return_value = "Windows"
@ -1059,7 +1061,7 @@ class TestWriteOpeningHeader(BaseMockDir):
def test_basic_include_static_hosts_linux(self):
kwargs = dict(
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
)
with self.mock_property("platform.system") as system:
system.return_value = "Linux"
@ -1096,6 +1098,7 @@ class TestWriteOpeningHeader(BaseMockDir):
outputsubfolder="",
numberofrules=5,
skipstatichosts=True,
nounifiedhosts=False,
)
write_opening_header(self.final_file, **kwargs)
@ -1123,6 +1126,41 @@ class TestWriteOpeningHeader(BaseMockDir):
):
self.assertNotIn(expected, contents)
def test_no_unified_hosts(self):
kwargs = dict(
extensions=["epsilon", "gamma"],
outputsubfolder="",
numberofrules=5,
skipstatichosts=True,
nounifiedhosts=True,
)
write_opening_header(self.final_file, **kwargs)
contents = self.final_file.getvalue()
contents = contents.decode("UTF-8")
# Expected contents.
for expected in (
", ".join(kwargs["extensions"]),
"# The unified hosts file was not used while generating this file.",
"# Extensions used to generate this file:",
"# This hosts file is a merged collection",
"# with a dash of crowd sourcing via GitHub",
"# Number of unique domains: {count}".format(count=kwargs["numberofrules"]),
"Fetch the latest version of this file:",
"Project home page: https://github.com/StevenBlack/hosts",
):
self.assertIn(expected, contents)
# Expected non-contents.
for expected in (
"127.0.0.1 localhost",
"127.0.0.1 local",
"127.0.0.53",
"127.0.1.1",
):
self.assertNotIn(expected, contents)
def _check_preamble(self, check_copy):
hosts_file = os.path.join(self.test_dir, "myhosts")
hosts_file += ".example" if check_copy else ""
@ -1131,7 +1169,7 @@ class TestWriteOpeningHeader(BaseMockDir):
f.write("peter-piper-picked-a-pepper")
kwargs = dict(
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True
extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False
)
with self.mock_property("updateHostsFile.BASEDIR_PATH"):
@ -1180,7 +1218,7 @@ class TestUpdateReadmeData(BaseMockDir):
def test_missing_keyword(self):
kwargs = dict(
extensions="", outputsubfolder="", numberofrules="", sourcesdata=""
extensions="", outputsubfolder="", numberofrules="", sourcesdata="", nounifiedhosts=False
)
for k in kwargs.keys():
@ -1196,7 +1234,7 @@ class TestUpdateReadmeData(BaseMockDir):
json.dump({"foo": "bar"}, f)
kwargs = dict(
extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts"
extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False
)
update_readme_data(self.readme_file, **kwargs)
@ -1206,7 +1244,7 @@ class TestUpdateReadmeData(BaseMockDir):
sep = self.sep
expected = {
"base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5},
"base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5},
"foo": "bar",
}
@ -1219,7 +1257,7 @@ class TestUpdateReadmeData(BaseMockDir):
json.dump({"base": "soprano"}, f)
kwargs = dict(
extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts"
extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False
)
update_readme_data(self.readme_file, **kwargs)
@ -1229,7 +1267,7 @@ class TestUpdateReadmeData(BaseMockDir):
sep = self.sep
expected = {
"base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5}
"base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5},
}
with open(self.readme_file, "r") as f:
@ -1245,6 +1283,7 @@ class TestUpdateReadmeData(BaseMockDir):
outputsubfolder="foo",
numberofrules=5,
sourcesdata="hosts",
nounifiedhosts=False,
)
update_readme_data(self.readme_file, **kwargs)
@ -1254,7 +1293,33 @@ class TestUpdateReadmeData(BaseMockDir):
sep = self.sep
expected = {
"com-org": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5}
"com-org": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5}
}
with open(self.readme_file, "r") as f:
actual = json.load(f)
self.assertEqual(actual, expected)
def test_set_no_unified_hosts(self):
with open(self.readme_file, "w") as f:
json.dump({}, f)
kwargs = dict(
extensions=["com", "org"],
outputsubfolder="foo",
numberofrules=5,
sourcesdata="hosts",
nounifiedhosts=True,
)
update_readme_data(self.readme_file, **kwargs)
if platform.system().lower() == "windows":
sep = "/"
else:
sep = self.sep
expected = {
"com-org-only": {"location": "foo" + sep, 'no_unified_hosts': True, "sourcesdata": "hosts", "entries": 5}
}
with open(self.readme_file, "r") as f:
@ -1424,52 +1489,53 @@ class TestFlushDnsCache(BaseStdout):
class TestRemoveOldHostsFile(BaseMockDir):
def setUp(self):
super(TestRemoveOldHostsFile, self).setUp()
self.hosts_file = os.path.join(self.test_dir, "hosts")
self.hosts_file = "hosts"
self.full_hosts_path = os.path.join(self.test_dir, "hosts")
def test_remove_hosts_file(self):
old_dir_count = self.dir_count
remove_old_hosts_file(self.hosts_file, backup=False)
remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False)
new_dir_count = old_dir_count + 1
self.assertEqual(self.dir_count, new_dir_count)
with open(self.hosts_file, "r") as f:
with open(self.full_hosts_path, "r") as f:
contents = f.read()
self.assertEqual(contents, "")
def test_remove_hosts_file_exists(self):
with open(self.hosts_file, "w") as f:
with open(self.full_hosts_path, "w") as f:
f.write("foo")
old_dir_count = self.dir_count
remove_old_hosts_file(self.hosts_file, backup=False)
remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False)
new_dir_count = old_dir_count
self.assertEqual(self.dir_count, new_dir_count)
with open(self.hosts_file, "r") as f:
with open(self.full_hosts_path, "r") as f:
contents = f.read()
self.assertEqual(contents, "")
@mock.patch("time.strftime", return_value="new")
def test_remove_hosts_file_backup(self, _):
with open(self.hosts_file, "w") as f:
with open(self.full_hosts_path, "w") as f:
f.write("foo")
old_dir_count = self.dir_count
remove_old_hosts_file(self.hosts_file, backup=True)
remove_old_hosts_file(self.test_dir, self.hosts_file, backup=True)
new_dir_count = old_dir_count + 1
self.assertEqual(self.dir_count, new_dir_count)
with open(self.hosts_file, "r") as f:
with open(self.full_hosts_path, "r") as f:
contents = f.read()
self.assertEqual(contents, "")
new_hosts_file = self.hosts_file + "-new"
new_hosts_file = self.full_hosts_path + "-new"
with open(new_hosts_file, "r") as f:
contents = f.read()

View File

@ -72,6 +72,7 @@ def get_defaults():
"keepdomaincomments": True,
"extensionspath": path_join_robust(BASEDIR_PATH, "extensions"),
"extensions": [],
"nounifiedhosts": False,
"compress": False,
"minimise": False,
"outputsubfolder": "",
@ -124,6 +125,13 @@ def main():
nargs="*",
help="Host extensions to include in the final hosts file.",
)
parser.add_argument(
"--nounifiedhosts",
dest="nounifiedhosts",
default=False,
action="store_true",
help="Do not include the unified hosts file in the final hosts file. Usually used together with `--extensions`.",
)
parser.add_argument(
"--ip",
"-i",
@ -248,6 +256,7 @@ def main():
auto = settings["auto"]
exclusion_regexes = settings["exclusionregexes"]
source_data_filename = settings["sourcedatafilename"]
no_unified_hosts = settings["nounifiedhosts"]
update_sources = prompt_for_update(freshen=settings["freshen"], update_auto=auto)
if update_sources:
@ -271,9 +280,12 @@ def main():
extensions=extensions,
extensionspath=extensions_path,
sourcedatafilename=source_data_filename,
nounifiedhosts=no_unified_hosts,
)
merge_file = create_initial_file()
merge_file = create_initial_file(
nounifiedhosts=no_unified_hosts,
)
remove_old_hosts_file(settings["outputpath"], "hosts", settings["backup"])
if settings["compress"]:
final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b")
@ -298,6 +310,7 @@ def main():
numberofrules=number_of_rules,
outputsubfolder=output_subfolder,
skipstatichosts=skip_static_hosts,
nounifiedhosts=no_unified_hosts,
)
final_file.close()
@ -308,6 +321,7 @@ def main():
numberofrules=number_of_rules,
outputsubfolder=output_subfolder,
sourcesdata=sources_data,
nounifiedhosts=no_unified_hosts,
)
print_success(
@ -666,6 +680,7 @@ def update_sources_data(sources_data, **sources_params):
2) extensions
3) extensionspath
4) sourcedatafilename
5) nounifiedhosts
Returns
-------
@ -675,13 +690,14 @@ def update_sources_data(sources_data, **sources_params):
source_data_filename = sources_params["sourcedatafilename"]
for source in sort_sources(
recursive_glob(sources_params["datapath"], source_data_filename)
):
update_file = open(source, "r", encoding="UTF-8")
update_data = json.load(update_file)
sources_data.append(update_data)
update_file.close()
if not sources_params["nounifiedhosts"]:
for source in sort_sources(
recursive_glob(sources_params["datapath"], source_data_filename)
):
update_file = open(source, "r", encoding="UTF-8")
update_data = json.load(update_file)
sources_data.append(update_data)
update_file.close()
for source in sources_params["extensions"]:
source_dir = path_join_robust(sources_params["extensionspath"], source)
@ -776,23 +792,32 @@ def update_all_sources(source_data_filename, host_filename):
# File Logic
def create_initial_file():
def create_initial_file(**initial_file_params):
"""
Initialize the file in which we merge all host files for later pruning.
Parameters
----------
header_params : kwargs
Dictionary providing additional parameters for populating the initial file
information. Currently, those fields are:
1) nounifiedhosts
"""
merge_file = tempfile.NamedTemporaryFile()
# spin the sources for the base file
for source in sort_sources(
recursive_glob(settings["datapath"], settings["hostfilename"])
):
if not initial_file_params["nounifiedhosts"]:
# spin the sources for the base file
for source in sort_sources(
recursive_glob(settings["datapath"], settings["hostfilename"])
):
start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source)))
end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source)))
start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source)))
end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source)))
with open(source, "r", encoding="UTF-8") as curFile:
write_data(merge_file, start + curFile.read() + end)
with open(source, "r", encoding="UTF-8") as curFile:
write_data(merge_file, start + curFile.read() + end)
# spin the sources for extensions to the base file
for source in settings["extensions"]:
@ -1113,6 +1138,7 @@ def write_opening_header(final_file, **header_params):
2) numberofrules
3) outputsubfolder
4) skipstatichosts
5) nounifiedhosts
"""
final_file.seek(0) # Reset file pointer.
@ -1120,22 +1146,41 @@ def write_opening_header(final_file, **header_params):
final_file.seek(0) # Write at the top.
no_unified_hosts = header_params["nounifiedhosts"]
if header_params["extensions"]:
if len(header_params["extensions"]) > 1:
write_data(
final_file,
"# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format(
", ".join(header_params["extensions"][:-1]),
header_params["extensions"][-1],
),
)
if no_unified_hosts:
if len(header_params["extensions"]) > 1:
write_data(
final_file,
"# Title: StevenBlack/hosts extensions {0} and {1} \n#\n".format(
", ".join(header_params["extensions"][:-1]),
header_params["extensions"][-1],
),
)
else:
write_data(
final_file,
"# Title: StevenBlack/hosts extension {0}\n#\n".format(
", ".join(header_params["extensions"])
),
)
else:
write_data(
final_file,
"# Title: StevenBlack/hosts with the {0} extension\n#\n".format(
", ".join(header_params["extensions"])
),
)
if len(header_params["extensions"]) > 1:
write_data(
final_file,
"# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format(
", ".join(header_params["extensions"][:-1]),
header_params["extensions"][-1],
),
)
else:
write_data(
final_file,
"# Title: StevenBlack/hosts with the {0} extension\n#\n".format(
", ".join(header_params["extensions"])
),
)
else:
write_data(final_file, "# Title: StevenBlack/hosts\n#\n")
@ -1151,12 +1196,21 @@ def write_opening_header(final_file, **header_params):
)
if header_params["extensions"]:
write_data(
final_file,
"# Extensions added to this file: "
+ ", ".join(header_params["extensions"])
+ "\n",
)
if header_params["nounifiedhosts"]:
write_data(
final_file,
"# The unified hosts file was not used while generating this file.\n"
"# Extensions used to generate this file: "
+ ", ".join(header_params["extensions"])
+ "\n",
)
else:
write_data(
final_file,
"# Extensions added to this file: "
+ ", ".join(header_params["extensions"])
+ "\n",
)
write_data(
final_file,
@ -1234,17 +1288,22 @@ def update_readme_data(readme_file, **readme_updates):
2) sourcesdata
3) numberofrules
4) outputsubfolder
5) nounifiedhosts
"""
extensions_key = "base"
extensions = readme_updates["extensions"]
no_unified_hosts = readme_updates["nounifiedhosts"]
if extensions:
extensions_key = "-".join(extensions)
if no_unified_hosts:
extensions_key = extensions_key + "-only"
output_folder = readme_updates["outputsubfolder"]
generation_data = {
"location": path_join_robust(output_folder, ""),
"no_unified_hosts": no_unified_hosts,
"entries": readme_updates["numberofrules"],
"sourcesdata": readme_updates["sourcesdata"],
}

View File

@ -32,7 +32,7 @@ def main():
keys = list(data.keys())
# Sort by the number of en-dashes in the key
# and then by the key string itself.
keys.sort(key=lambda item: (item.count("-"), item))
keys.sort(key=lambda item: (item.replace("-only", "").count("-"), item.replace("-only", "")))
toc_rows = ""
for key in keys:
@ -40,9 +40,14 @@ def main():
if key == "base":
data[key]["description"] = "Unified hosts = **(adware + malware)**"
else:
data[key]["description"] = (
"Unified hosts **+ " + key.replace("-", " + ") + "**"
)
if data[key]["no_unified_hosts"]:
data[key]["description"] = (
"**" + key.replace("-only", "").replace("-", " + ") + "**"
)
else:
data[key]["description"] = (
"Unified hosts **+ " + key.replace("-", " + ") + "**"
)
if "\\" in data[key]["location"]:
data[key]["location"] = data[key]["location"].replace("\\", "/")
@ -64,9 +69,12 @@ def main():
)
size_history_graph = "![Size history](https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts_file_size_history.png)"
for key in keys:
extensions = key.replace("-", ", ")
extensions = key.replace("-only", "").replace("-", ", ")
extensions_str = "* Extensions: **" + extensions + "**."
extensions_header = "with " + extensions + " extensions"
if data[key]["no_unified_hosts"]:
extensions_header = "Limited to the extensions: " + extensions
else:
extensions_header = "Unified hosts file with " + extensions + " extensions"
source_rows = ""
source_list = data[key]["sourcesdata"]