mirror of
https://github.com/StevenBlack/hosts.git
synced 2024-07-07 13:02:27 +02:00
Merge pull request #520 from funilrys/notDavid-protocol
Possible fix of the encoding and/or downlaod issue(s)
This commit is contained in:
commit
aa6b09561b
@ -5,4 +5,4 @@ conda create -n hosts python=$PYTHON_VERSION || exit 1
|
|||||||
source activate hosts
|
source activate hosts
|
||||||
|
|
||||||
echo "Installing packages..."
|
echo "Installing packages..."
|
||||||
conda install mock flake8
|
conda install mock flake8 beautifulsoup4 lxml
|
||||||
|
@ -39,14 +39,20 @@ folders.
|
|||||||
|
|
||||||
## Generate your own unified hosts file
|
## Generate your own unified hosts file
|
||||||
|
|
||||||
|
**Note** if you are using Python 3, please install the dependencies with:
|
||||||
|
|
||||||
|
pip3 install --user -r requirements.txt
|
||||||
|
|
||||||
|
**Note** if you are using Python 2, please install the dependencies with:
|
||||||
|
|
||||||
|
pip2 install --user -r requirements_python2.txt
|
||||||
|
|
||||||
|
**Note** we recommend the `--user` flag which installs the required dependencies at the user level. More information about it can be found on pip [documentation](https://pip.pypa.io/en/stable/reference/pip_install/?highlight=--user#cmdoption-user).
|
||||||
|
|
||||||
To run unit tests, in the top level directory, just run:
|
To run unit tests, in the top level directory, just run:
|
||||||
|
|
||||||
python testUpdateHostsFile.py
|
python testUpdateHostsFile.py
|
||||||
|
|
||||||
**Note** if you are using Python 2, you must first install the `mock` library:
|
|
||||||
|
|
||||||
pip install mock
|
|
||||||
|
|
||||||
The `updateHostsFile.py` script, which is Python 2.7 and Python 3-compatible,
|
The `updateHostsFile.py` script, which is Python 2.7 and Python 3-compatible,
|
||||||
will generate a unified hosts file based on the sources in the local `data/`
|
will generate a unified hosts file based on the sources in the local `data/`
|
||||||
subfolder. The script will prompt you whether it should fetch updated
|
subfolder. The script will prompt you whether it should fetch updated
|
||||||
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
lxml==4.1.1
|
||||||
|
beautifulsoup4==4.6.0
|
||||||
|
mock==2.0.0
|
3
requirements_python2.txt
Normal file
3
requirements_python2.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
mock==2.0.0
|
||||||
|
lxml==4.1.1
|
||||||
|
beautifulsoup4==4.6.0
|
@ -5,25 +5,29 @@
|
|||||||
#
|
#
|
||||||
# Python script for testing updateHostFiles.py
|
# Python script for testing updateHostFiles.py
|
||||||
|
|
||||||
from updateHostsFile import (
|
|
||||||
Colors, PY3, colorize, display_exclusion_options, exclude_domain,
|
|
||||||
flush_dns_cache, gather_custom_exclusions, get_defaults, get_file_by_url,
|
|
||||||
is_valid_domain_format, matches_exclusions, move_hosts_file_into_place,
|
|
||||||
normalize_rule, path_join_robust, print_failure, print_success,
|
|
||||||
prompt_for_exclusions, prompt_for_move, prompt_for_flush_dns_cache,
|
|
||||||
prompt_for_update, query_yes_no, recursive_glob, remove_old_hosts_file,
|
|
||||||
supports_color, strip_rule, update_all_sources, update_readme_data,
|
|
||||||
update_sources_data, write_data, write_opening_header)
|
|
||||||
|
|
||||||
import updateHostsFile
|
|
||||||
import unittest
|
|
||||||
import tempfile
|
|
||||||
import locale
|
|
||||||
import shutil
|
|
||||||
import json
|
import json
|
||||||
import sys
|
import locale
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import updateHostsFile
|
||||||
|
from updateHostsFile import (PY3, Colors, colorize, display_exclusion_options,
|
||||||
|
domain_to_idna, exclude_domain, flush_dns_cache,
|
||||||
|
gather_custom_exclusions, get_defaults,
|
||||||
|
get_file_by_url, is_valid_domain_format,
|
||||||
|
matches_exclusions, move_hosts_file_into_place,
|
||||||
|
normalize_rule, path_join_robust, print_failure,
|
||||||
|
print_success, prompt_for_exclusions,
|
||||||
|
prompt_for_flush_dns_cache, prompt_for_move,
|
||||||
|
prompt_for_update, query_yes_no, recursive_glob,
|
||||||
|
remove_old_hosts_file, strip_rule, supports_color,
|
||||||
|
update_all_sources, update_readme_data,
|
||||||
|
update_sources_data, write_data,
|
||||||
|
write_opening_header)
|
||||||
|
|
||||||
if PY3:
|
if PY3:
|
||||||
from io import BytesIO, StringIO
|
from io import BytesIO, StringIO
|
||||||
@ -1360,6 +1364,81 @@ def mock_url_open_decode_fail(_):
|
|||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
class DomainToIDNA(Base):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(DomainToIDNA, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
self.domains = [b'\xc9\xa2oogle.com', b'www.huala\xc3\xb1e.cl']
|
||||||
|
self.expected_domains = ['xn--oogle-wmc.com', 'www.xn--hualae-0wa.cl']
|
||||||
|
|
||||||
|
def test_empty_line(self):
|
||||||
|
data = ["", "\r", "\n"]
|
||||||
|
|
||||||
|
for empty in data:
|
||||||
|
expected = empty
|
||||||
|
|
||||||
|
actual = domain_to_idna(empty)
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_commented_line(self):
|
||||||
|
data = "# Hello World"
|
||||||
|
expected = data
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_simple_line(self):
|
||||||
|
# Test with a space as separator.
|
||||||
|
for i in range(len(self.domains)):
|
||||||
|
data = (b"0.0.0.0 " + self.domains[i]).decode('utf-8')
|
||||||
|
expected = "0.0.0.0 " + self.expected_domains[i]
|
||||||
|
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
# Test with a tabulation as separator.
|
||||||
|
for i in range(len(self.domains)):
|
||||||
|
data = (b"0.0.0.0\t" + self.domains[i]).decode('utf-8')
|
||||||
|
expected = "0.0.0.0\t" + self.expected_domains[i]
|
||||||
|
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_single_line_with_comment_at_the_end(self):
|
||||||
|
# Test with a space as separator.
|
||||||
|
for i in range(len(self.domains)):
|
||||||
|
data = (b"0.0.0.0 " + self.domains[i] + b" # Hello World") \
|
||||||
|
.decode('utf-8')
|
||||||
|
expected = "0.0.0.0 " + self.expected_domains[i] + " # Hello World"
|
||||||
|
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
# Test with a tabulation as separator.
|
||||||
|
for i in range(len(self.domains)):
|
||||||
|
data = (b"0.0.0.0\t" + self.domains[i] + b" # Hello World") \
|
||||||
|
.decode('utf-8')
|
||||||
|
expected = "0.0.0.0\t" + self.expected_domains[i] + \
|
||||||
|
" # Hello World"
|
||||||
|
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_single_line_without_prefix(self):
|
||||||
|
for i in range(len(self.domains)):
|
||||||
|
data = self.domains[i].decode('utf-8')
|
||||||
|
expected = self.expected_domains[i]
|
||||||
|
|
||||||
|
actual = domain_to_idna(data)
|
||||||
|
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
|
||||||
class GetFileByUrl(BaseStdout):
|
class GetFileByUrl(BaseStdout):
|
||||||
|
|
||||||
@mock.patch("updateHostsFile.urlopen",
|
@mock.patch("updateHostsFile.urlopen",
|
||||||
|
@ -6,23 +6,26 @@
|
|||||||
# This Python script will combine all the host files you provide
|
# This Python script will combine all the host files you provide
|
||||||
# as sources into one, unique host file to keep you internet browsing happy.
|
# as sources into one, unique host file to keep you internet browsing happy.
|
||||||
|
|
||||||
from __future__ import (absolute_import, division,
|
from __future__ import (absolute_import, division, print_function,
|
||||||
print_function, unicode_literals)
|
unicode_literals)
|
||||||
from glob import glob
|
|
||||||
|
|
||||||
import os
|
import argparse
|
||||||
|
import fnmatch
|
||||||
|
import json
|
||||||
import locale
|
import locale
|
||||||
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import fnmatch
|
from glob import glob
|
||||||
import argparse
|
|
||||||
import socket
|
import lxml # noqa: F401
|
||||||
import json
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
# Detecting Python 3 for version-dependent implementations
|
# Detecting Python 3 for version-dependent implementations
|
||||||
PY3 = sys.version_info >= (3, 0)
|
PY3 = sys.version_info >= (3, 0)
|
||||||
@ -1126,6 +1129,60 @@ def remove_old_hosts_file(backup):
|
|||||||
# End File Logic
|
# End File Logic
|
||||||
|
|
||||||
|
|
||||||
|
def domain_to_idna(line):
|
||||||
|
"""
|
||||||
|
Encode a domain which is presente into a line into `idna`. This way we
|
||||||
|
avoid the most encoding issue.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
line : str
|
||||||
|
The line we have to encode/decode.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
line : str
|
||||||
|
The line in a converted format.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
- This function encode only the domain to `idna` format because in
|
||||||
|
most cases, the encoding issue is due to a domain which looks like
|
||||||
|
`b'\xc9\xa2oogle.com'.decode('idna')`.
|
||||||
|
- About the splitting:
|
||||||
|
We split because we only want to encode the domain and not the full
|
||||||
|
line, which may cause some issues. Keep in mind that we split, but we
|
||||||
|
still concatenate once we encoded the domain.
|
||||||
|
|
||||||
|
- The following split the prefix `0.0.0.0` or `127.0.0.1` of a line.
|
||||||
|
- The following also split the trailing comment of a given line.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not line.startswith('#'):
|
||||||
|
for separator in ['\t', ' ']:
|
||||||
|
comment = ''
|
||||||
|
|
||||||
|
if separator in line:
|
||||||
|
splited_line = line.split(separator)
|
||||||
|
if '#' in splited_line[1]:
|
||||||
|
index_comment = splited_line[1].find('#')
|
||||||
|
|
||||||
|
if index_comment > -1:
|
||||||
|
comment = splited_line[1][index_comment:]
|
||||||
|
|
||||||
|
splited_line[1] = splited_line[1] \
|
||||||
|
.split(comment)[0] \
|
||||||
|
.encode("IDNA").decode("UTF-8") + \
|
||||||
|
comment
|
||||||
|
|
||||||
|
splited_line[1] = splited_line[1] \
|
||||||
|
.encode("IDNA") \
|
||||||
|
.decode("UTF-8")
|
||||||
|
return separator.join(splited_line)
|
||||||
|
return line.encode("IDNA").decode("UTF-8")
|
||||||
|
return line.encode("UTF-8").decode("UTF-8")
|
||||||
|
|
||||||
|
|
||||||
# Helper Functions
|
# Helper Functions
|
||||||
def get_file_by_url(url):
|
def get_file_by_url(url):
|
||||||
"""
|
"""
|
||||||
@ -1141,11 +1198,17 @@ def get_file_by_url(url):
|
|||||||
url_data : str or None
|
url_data : str or None
|
||||||
The data retrieved at that URL from the file. Returns None if the
|
The data retrieved at that URL from the file. Returns None if the
|
||||||
attempted retrieval is unsuccessful.
|
attempted retrieval is unsuccessful.
|
||||||
|
|
||||||
|
Note
|
||||||
|
----
|
||||||
|
- BeautifulSoup is used in this case to avoid having to search in which
|
||||||
|
format we have to encode or decode data before parsing it to UTF-8.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = urlopen(url)
|
f = urlopen(url)
|
||||||
return f.read().decode("UTF-8")
|
soup = BeautifulSoup(f.read(), 'lxml').get_text()
|
||||||
|
return '\n'.join(list(map(domain_to_idna, soup.split('\n'))))
|
||||||
except Exception:
|
except Exception:
|
||||||
print("Problem getting file: ", url)
|
print("Problem getting file: ", url)
|
||||||
|
|
||||||
@ -1165,7 +1228,10 @@ def write_data(f, data):
|
|||||||
if PY3:
|
if PY3:
|
||||||
f.write(bytes(data, "UTF-8"))
|
f.write(bytes(data, "UTF-8"))
|
||||||
else:
|
else:
|
||||||
f.write(str(data).encode("UTF-8"))
|
try:
|
||||||
|
f.write(str(data))
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
f.write(str(data.encode("UTF-8")))
|
||||||
|
|
||||||
|
|
||||||
def list_dir_no_hidden(path):
|
def list_dir_no_hidden(path):
|
||||||
|
Loading…
Reference in New Issue
Block a user