mirror of
https://github.com/StevenBlack/hosts.git
synced 2024-07-02 18:45:52 +02:00
Merge pull request #520 from funilrys/notDavid-protocol
Possible fix of the encoding and/or downlaod issue(s)
This commit is contained in:
commit
aa6b09561b
|
@ -5,4 +5,4 @@ conda create -n hosts python=$PYTHON_VERSION || exit 1
|
|||
source activate hosts
|
||||
|
||||
echo "Installing packages..."
|
||||
conda install mock flake8
|
||||
conda install mock flake8 beautifulsoup4 lxml
|
||||
|
|
|
@ -39,14 +39,20 @@ folders.
|
|||
|
||||
## Generate your own unified hosts file
|
||||
|
||||
**Note** if you are using Python 3, please install the dependencies with:
|
||||
|
||||
pip3 install --user -r requirements.txt
|
||||
|
||||
**Note** if you are using Python 2, please install the dependencies with:
|
||||
|
||||
pip2 install --user -r requirements_python2.txt
|
||||
|
||||
**Note** we recommend the `--user` flag which installs the required dependencies at the user level. More information about it can be found on pip [documentation](https://pip.pypa.io/en/stable/reference/pip_install/?highlight=--user#cmdoption-user).
|
||||
|
||||
To run unit tests, in the top level directory, just run:
|
||||
|
||||
python testUpdateHostsFile.py
|
||||
|
||||
**Note** if you are using Python 2, you must first install the `mock` library:
|
||||
|
||||
pip install mock
|
||||
|
||||
The `updateHostsFile.py` script, which is Python 2.7 and Python 3-compatible,
|
||||
will generate a unified hosts file based on the sources in the local `data/`
|
||||
subfolder. The script will prompt you whether it should fetch updated
|
||||
|
@ -104,9 +110,9 @@ in a subfolder. If the subfolder does not exist, it will be created.
|
|||
section at the top, containing lines like `127.0.0.1 localhost`. This is
|
||||
useful for configuring proximate DNS services on the local network.
|
||||
|
||||
`--compress`, or `-c`: `false` (default) or `true`, *Compress* the hosts file
|
||||
ignoring non-necessary lines (empty lines and comments) and putting multiple
|
||||
domains in each line. Reducing the number of lines of the hosts file improves
|
||||
`--compress`, or `-c`: `false` (default) or `true`, *Compress* the hosts file
|
||||
ignoring non-necessary lines (empty lines and comments) and putting multiple
|
||||
domains in each line. Reducing the number of lines of the hosts file improves
|
||||
the performances under Windows (with DNS Client service enabled).
|
||||
|
||||
`--minimise`, or `-m`: `false` (default) or `true`, like `--compress`, but puts
|
||||
|
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
lxml==4.1.1
|
||||
beautifulsoup4==4.6.0
|
||||
mock==2.0.0
|
3
requirements_python2.txt
Normal file
3
requirements_python2.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
mock==2.0.0
|
||||
lxml==4.1.1
|
||||
beautifulsoup4==4.6.0
|
|
@ -5,25 +5,29 @@
|
|||
#
|
||||
# Python script for testing updateHostFiles.py
|
||||
|
||||
from updateHostsFile import (
|
||||
Colors, PY3, colorize, display_exclusion_options, exclude_domain,
|
||||
flush_dns_cache, gather_custom_exclusions, get_defaults, get_file_by_url,
|
||||
is_valid_domain_format, matches_exclusions, move_hosts_file_into_place,
|
||||
normalize_rule, path_join_robust, print_failure, print_success,
|
||||
prompt_for_exclusions, prompt_for_move, prompt_for_flush_dns_cache,
|
||||
prompt_for_update, query_yes_no, recursive_glob, remove_old_hosts_file,
|
||||
supports_color, strip_rule, update_all_sources, update_readme_data,
|
||||
update_sources_data, write_data, write_opening_header)
|
||||
|
||||
import updateHostsFile
|
||||
import unittest
|
||||
import tempfile
|
||||
import locale
|
||||
import shutil
|
||||
import json
|
||||
import sys
|
||||
import locale
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import updateHostsFile
|
||||
from updateHostsFile import (PY3, Colors, colorize, display_exclusion_options,
|
||||
domain_to_idna, exclude_domain, flush_dns_cache,
|
||||
gather_custom_exclusions, get_defaults,
|
||||
get_file_by_url, is_valid_domain_format,
|
||||
matches_exclusions, move_hosts_file_into_place,
|
||||
normalize_rule, path_join_robust, print_failure,
|
||||
print_success, prompt_for_exclusions,
|
||||
prompt_for_flush_dns_cache, prompt_for_move,
|
||||
prompt_for_update, query_yes_no, recursive_glob,
|
||||
remove_old_hosts_file, strip_rule, supports_color,
|
||||
update_all_sources, update_readme_data,
|
||||
update_sources_data, write_data,
|
||||
write_opening_header)
|
||||
|
||||
if PY3:
|
||||
from io import BytesIO, StringIO
|
||||
|
@ -1360,6 +1364,81 @@ def mock_url_open_decode_fail(_):
|
|||
return m
|
||||
|
||||
|
||||
class DomainToIDNA(Base):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DomainToIDNA, self).__init__(*args, **kwargs)
|
||||
|
||||
self.domains = [b'\xc9\xa2oogle.com', b'www.huala\xc3\xb1e.cl']
|
||||
self.expected_domains = ['xn--oogle-wmc.com', 'www.xn--hualae-0wa.cl']
|
||||
|
||||
def test_empty_line(self):
|
||||
data = ["", "\r", "\n"]
|
||||
|
||||
for empty in data:
|
||||
expected = empty
|
||||
|
||||
actual = domain_to_idna(empty)
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_commented_line(self):
|
||||
data = "# Hello World"
|
||||
expected = data
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_simple_line(self):
|
||||
# Test with a space as separator.
|
||||
for i in range(len(self.domains)):
|
||||
data = (b"0.0.0.0 " + self.domains[i]).decode('utf-8')
|
||||
expected = "0.0.0.0 " + self.expected_domains[i]
|
||||
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
# Test with a tabulation as separator.
|
||||
for i in range(len(self.domains)):
|
||||
data = (b"0.0.0.0\t" + self.domains[i]).decode('utf-8')
|
||||
expected = "0.0.0.0\t" + self.expected_domains[i]
|
||||
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_single_line_with_comment_at_the_end(self):
|
||||
# Test with a space as separator.
|
||||
for i in range(len(self.domains)):
|
||||
data = (b"0.0.0.0 " + self.domains[i] + b" # Hello World") \
|
||||
.decode('utf-8')
|
||||
expected = "0.0.0.0 " + self.expected_domains[i] + " # Hello World"
|
||||
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
# Test with a tabulation as separator.
|
||||
for i in range(len(self.domains)):
|
||||
data = (b"0.0.0.0\t" + self.domains[i] + b" # Hello World") \
|
||||
.decode('utf-8')
|
||||
expected = "0.0.0.0\t" + self.expected_domains[i] + \
|
||||
" # Hello World"
|
||||
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_single_line_without_prefix(self):
|
||||
for i in range(len(self.domains)):
|
||||
data = self.domains[i].decode('utf-8')
|
||||
expected = self.expected_domains[i]
|
||||
|
||||
actual = domain_to_idna(data)
|
||||
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
|
||||
class GetFileByUrl(BaseStdout):
|
||||
|
||||
@mock.patch("updateHostsFile.urlopen",
|
||||
|
|
|
@ -6,23 +6,26 @@
|
|||
# This Python script will combine all the host files you provide
|
||||
# as sources into one, unique host file to keep you internet browsing happy.
|
||||
|
||||
from __future__ import (absolute_import, division,
|
||||
print_function, unicode_literals)
|
||||
from glob import glob
|
||||
from __future__ import (absolute_import, division, print_function,
|
||||
unicode_literals)
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import fnmatch
|
||||
import json
|
||||
import locale
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import fnmatch
|
||||
import argparse
|
||||
import socket
|
||||
import json
|
||||
from glob import glob
|
||||
|
||||
import lxml # noqa: F401
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Detecting Python 3 for version-dependent implementations
|
||||
PY3 = sys.version_info >= (3, 0)
|
||||
|
@ -1126,6 +1129,60 @@ def remove_old_hosts_file(backup):
|
|||
# End File Logic
|
||||
|
||||
|
||||
def domain_to_idna(line):
|
||||
"""
|
||||
Encode a domain which is presente into a line into `idna`. This way we
|
||||
avoid the most encoding issue.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
line : str
|
||||
The line we have to encode/decode.
|
||||
|
||||
Returns
|
||||
-------
|
||||
line : str
|
||||
The line in a converted format.
|
||||
|
||||
Notes
|
||||
-----
|
||||
- This function encode only the domain to `idna` format because in
|
||||
most cases, the encoding issue is due to a domain which looks like
|
||||
`b'\xc9\xa2oogle.com'.decode('idna')`.
|
||||
- About the splitting:
|
||||
We split because we only want to encode the domain and not the full
|
||||
line, which may cause some issues. Keep in mind that we split, but we
|
||||
still concatenate once we encoded the domain.
|
||||
|
||||
- The following split the prefix `0.0.0.0` or `127.0.0.1` of a line.
|
||||
- The following also split the trailing comment of a given line.
|
||||
"""
|
||||
|
||||
if not line.startswith('#'):
|
||||
for separator in ['\t', ' ']:
|
||||
comment = ''
|
||||
|
||||
if separator in line:
|
||||
splited_line = line.split(separator)
|
||||
if '#' in splited_line[1]:
|
||||
index_comment = splited_line[1].find('#')
|
||||
|
||||
if index_comment > -1:
|
||||
comment = splited_line[1][index_comment:]
|
||||
|
||||
splited_line[1] = splited_line[1] \
|
||||
.split(comment)[0] \
|
||||
.encode("IDNA").decode("UTF-8") + \
|
||||
comment
|
||||
|
||||
splited_line[1] = splited_line[1] \
|
||||
.encode("IDNA") \
|
||||
.decode("UTF-8")
|
||||
return separator.join(splited_line)
|
||||
return line.encode("IDNA").decode("UTF-8")
|
||||
return line.encode("UTF-8").decode("UTF-8")
|
||||
|
||||
|
||||
# Helper Functions
|
||||
def get_file_by_url(url):
|
||||
"""
|
||||
|
@ -1141,11 +1198,17 @@ def get_file_by_url(url):
|
|||
url_data : str or None
|
||||
The data retrieved at that URL from the file. Returns None if the
|
||||
attempted retrieval is unsuccessful.
|
||||
|
||||
Note
|
||||
----
|
||||
- BeautifulSoup is used in this case to avoid having to search in which
|
||||
format we have to encode or decode data before parsing it to UTF-8.
|
||||
"""
|
||||
|
||||
try:
|
||||
f = urlopen(url)
|
||||
return f.read().decode("UTF-8")
|
||||
soup = BeautifulSoup(f.read(), 'lxml').get_text()
|
||||
return '\n'.join(list(map(domain_to_idna, soup.split('\n'))))
|
||||
except Exception:
|
||||
print("Problem getting file: ", url)
|
||||
|
||||
|
@ -1165,7 +1228,10 @@ def write_data(f, data):
|
|||
if PY3:
|
||||
f.write(bytes(data, "UTF-8"))
|
||||
else:
|
||||
f.write(str(data).encode("UTF-8"))
|
||||
try:
|
||||
f.write(str(data))
|
||||
except UnicodeEncodeError:
|
||||
f.write(str(data.encode("UTF-8")))
|
||||
|
||||
|
||||
def list_dir_no_hidden(path):
|
||||
|
|
Loading…
Reference in New Issue
Block a user