mirror of
https://github.com/StevenBlack/hosts.git
synced 2024-07-12 07:12:52 +02:00
90ad37d6fc
Update to include preamble handling
378 lines
12 KiB
Python
Executable File
378 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# Script by Ben Limmer
|
|
# https://github.com/l1m5
|
|
#
|
|
# This simple Python script will combine all the host files you provide
|
|
# as sources into one, unique host file to keep you internet browsing happy.
|
|
|
|
# Making Python 2 compatible with Python 3
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
import os
|
|
import platform
|
|
import re
|
|
import string
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
# zip files are not used actually, support deleted
|
|
# StringIO is not needed in Python 3
|
|
# Python 3 works differently with urlopen
|
|
|
|
# Supporting urlopen in Python 2 and Python 3
|
|
try:
|
|
from urllib.parse import urlparse, urlencode
|
|
from urllib.request import urlopen, Request
|
|
from urllib.error import HTTPError
|
|
except ImportError:
|
|
from urlparse import urlparse
|
|
from urllib import urlencode
|
|
from urllib2 import urlopen, Request, HTTPError
|
|
|
|
# This function handles both Python 2 and Python 3
|
|
def getFileByUrl(url):
|
|
try:
|
|
f = urlopen(url)
|
|
return f.read().decode("UTF-8")
|
|
except:
|
|
print ("Problem getting file: ", url);
|
|
# raise
|
|
|
|
|
|
# In Python 3 "print" is a function, braces are added everywhere
|
|
|
|
# Detecting Python 3 for version-dependent implementations
|
|
Python3=False;
|
|
cur_version = sys.version_info
|
|
if cur_version >= (3, 0):
|
|
Python3=True;
|
|
|
|
# This function works in both Python 2 and Python 3
|
|
def myInput(msg=""):
|
|
if Python3:
|
|
return input(msg);
|
|
else:
|
|
return raw_input(msg);
|
|
|
|
|
|
# Cross-python writing function
|
|
def writeData(f, data):
|
|
if Python3:
|
|
f.write(bytes(data, 'UTF-8'))
|
|
else:
|
|
f.write(str(data).encode('UTF-8'))
|
|
|
|
|
|
# Project Settings
|
|
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
|
|
DATA_PATH = os.path.join(BASEDIR_PATH, 'data')
|
|
DATA_FILENAMES = 'hosts'
|
|
UPDATE_URL_FILENAME = 'update.info'
|
|
SOURCES = os.listdir(DATA_PATH)
|
|
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
|
|
README_FILE = os.path.join(BASEDIR_PATH, 'readme.md')
|
|
TARGET_HOST = '0.0.0.0'
|
|
|
|
# Exclusions
|
|
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
|
|
|
|
# Common domains to exclude
|
|
COMMON_EXCLUSIONS = ['hulu.com']
|
|
|
|
# Global vars
|
|
exclusionRegexs = []
|
|
numberOfRules = 0
|
|
|
|
def main():
|
|
promptForUpdate()
|
|
promptForExclusions()
|
|
mergeFile = createInitialFile()
|
|
finalFile = removeDups(mergeFile)
|
|
finalizeFile(finalFile)
|
|
updateReadme(numberOfRules)
|
|
printSuccess('Success! Your shiny new hosts file has been prepared.\nIt contains ' + "{:,}".format( numberOfRules ) + ' unique entries.')
|
|
|
|
promptForMove(finalFile)
|
|
|
|
# Prompt the User
|
|
def promptForUpdate():
|
|
response = query_yes_no("Do you want to update all data sources?")
|
|
if (response == "yes"):
|
|
updateAllSources()
|
|
else:
|
|
print ('OK, we\'ll stick with what we\'ve got locally.')
|
|
|
|
def promptForExclusions():
|
|
response = query_yes_no("Do you want to exclude any domains?\n" +
|
|
"For example, hulu.com video streaming must be able to access " +
|
|
"its tracking and ad servers in order to play video.")
|
|
if (response == "yes"):
|
|
displayExclusionOptions()
|
|
else:
|
|
print ('OK, we won\'t exclude any domains.')
|
|
|
|
def promptForMoreCustomExclusions():
|
|
response = query_yes_no("Do you have more domains you want to enter?")
|
|
if (response == "yes"):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def promptForMove(finalFile):
|
|
response = query_yes_no("Do you want to replace your existing hosts file with the newly generated file?")
|
|
if (response == "yes"):
|
|
moveHostsFileIntoPlace(finalFile)
|
|
else:
|
|
return False
|
|
# End Prompt the User
|
|
|
|
# Exclusion logic
|
|
def displayExclusionOptions():
|
|
for exclusionOption in COMMON_EXCLUSIONS:
|
|
response = query_yes_no("Do you want to exclude the domain " + exclusionOption + " ?")
|
|
if (response == "yes"):
|
|
excludeDomain(exclusionOption)
|
|
else:
|
|
continue
|
|
response = query_yes_no("Do you want to exclude any other domains?")
|
|
if (response == "yes"):
|
|
gatherCustomExclusions()
|
|
|
|
def gatherCustomExclusions():
|
|
while True:
|
|
# Cross-python Input
|
|
domainFromUser = myInput("Enter the domain you want to exclude (e.g. facebook.com): ")
|
|
if (isValidDomainFormat(domainFromUser)):
|
|
excludeDomain(domainFromUser)
|
|
if (promptForMoreCustomExclusions() == False):
|
|
return
|
|
|
|
def excludeDomain(domain):
|
|
exclusionRegexs.append(re.compile(EXCLUSION_PATTERN + domain))
|
|
|
|
def matchesExclusions(strippedRule):
|
|
strippedDomain = strippedRule.split()[1]
|
|
for exclusionRegex in exclusionRegexs:
|
|
if exclusionRegex.search(strippedDomain):
|
|
return True
|
|
return False
|
|
# End Exclusion Logic
|
|
|
|
# Update Logic
|
|
def updateAllSources():
|
|
for source in SOURCES:
|
|
updateURL = getUpdateURLFromFile(source)
|
|
if (updateURL == None):
|
|
continue;
|
|
print ('Updating source ' + source + ' from ' + updateURL)
|
|
# Cross-python call
|
|
updatedFile = getFileByUrl(updateURL);
|
|
updatedFile = updatedFile.replace('\r', '') #get rid of carriage-return symbols
|
|
|
|
# This is cross-python code
|
|
dataFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'wb')
|
|
writeData(dataFile, updatedFile);
|
|
dataFile.close()
|
|
|
|
def getUpdateURLFromFile(source):
|
|
pathToUpdateFile = os.path.join(DATA_PATH, source, UPDATE_URL_FILENAME)
|
|
if os.path.exists(pathToUpdateFile):
|
|
updateFile = open(pathToUpdateFile, 'r')
|
|
retURL = updateFile.readline().strip()
|
|
updateFile.close()
|
|
else:
|
|
retURL = None
|
|
printFailure('Warning: Can\'t find the update file for source ' + source + '\n' +
|
|
'Make sure that there\'s a file at ' + pathToUpdateFile)
|
|
return retURL
|
|
# End Update Logic
|
|
|
|
# File Logic
|
|
def createInitialFile():
|
|
mergeFile = tempfile.NamedTemporaryFile()
|
|
for source in SOURCES:
|
|
curFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'r')
|
|
#Done in a cross-python way
|
|
writeData(mergeFile, '\n# Begin ' + source + '\n')
|
|
writeData(mergeFile, curFile.read())
|
|
writeData(mergeFile, '\n# End ' + source + '\n')
|
|
|
|
return mergeFile
|
|
|
|
def removeDups(mergeFile):
|
|
global numberOfRules
|
|
|
|
# Another mode is required to read and write the file in Python 3
|
|
finalFile = open(os.path.join(BASEDIR_PATH, 'hosts'), 'r+b')
|
|
mergeFile.seek(0) # reset file pointer
|
|
|
|
hostnames = set()
|
|
hostnames.add("localhost")
|
|
for line in mergeFile.readlines():
|
|
# Explicit encoding
|
|
line = line.decode("UTF-8")
|
|
# Testing the first character doesn't require startswith
|
|
if line[0] == '#' or re.match(r'^\s*$', line[0]):
|
|
# Cross-python write
|
|
writeData(finalFile, line)
|
|
continue
|
|
|
|
strippedRule = stripRule(line) #strip comments
|
|
if matchesExclusions(strippedRule):
|
|
continue
|
|
hostname, normalizedRule = normalizeRule(strippedRule) # normalize rule
|
|
|
|
if normalizedRule and (hostname not in hostnames):
|
|
writeData(finalFile, normalizedRule)
|
|
hostnames.add(hostname)
|
|
numberOfRules += 1
|
|
|
|
mergeFile.close()
|
|
|
|
return finalFile
|
|
|
|
def normalizeRule(rule):
|
|
result = re.search(r'^[ \t]*(\d+\.\d+\.\d+\.\d+)\s+([\w\.-]+)(.*)',rule)
|
|
if result:
|
|
target, hostname, suffix = result.groups()
|
|
hostname = hostname.lower() # explicitly lowercase hostname
|
|
if suffix is not '':
|
|
# add suffix as comment only, not as a separate host
|
|
return hostname, "%s %s #%s\n" % (TARGET_HOST, hostname, suffix)
|
|
else:
|
|
return hostname, "%s %s\n" % (TARGET_HOST, hostname)
|
|
print ('==>%s<==' % rule)
|
|
return None, None
|
|
|
|
def finalizeFile(finalFile):
|
|
writeOpeningHeader(finalFile)
|
|
finalFile.close()
|
|
|
|
# Some sources put comments around their rules, for accuracy we need to strip them
|
|
# the comments are preserved in the output hosts file
|
|
def stripRule(line):
|
|
splitLine = line.split()
|
|
if (len(splitLine) < 2) :
|
|
# This is due to the diffrences between bytes and string type in Python 3
|
|
printFailure('A line in the hostfile is going to cause problems because it is nonstandard\n' +
|
|
'The line reads ' + str(line) + ' please check your data files. Maybe you have a comment without a #?')
|
|
sys.exit()
|
|
return splitLine[0] + ' ' + splitLine[1]
|
|
|
|
def writeOpeningHeader(finalFile):
|
|
global numberOfRules
|
|
finalFile.seek(0) #reset file pointer
|
|
fileContents = finalFile.read(); #save content
|
|
finalFile.seek(0) #write at the top
|
|
writeData(finalFile, '# This file is a merged collection of hosts from reputable sources,\n')
|
|
writeData(finalFile, '# with a dash of crowd sourcing via Github\n#\n')
|
|
writeData(finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n')
|
|
writeData(finalFile, '# Current sources:\n')
|
|
for source in SOURCES:
|
|
writeData(finalFile, '# ' + source + '\n')
|
|
writeData(finalFile, '#\n')
|
|
writeData(finalFile, '# Merging these sources produced ' + "{:,}".format( numberOfRules ) + ' unique entries\n')
|
|
writeData(finalFile, '# ===============================================================\n')
|
|
writeData(finalFile, '\n')
|
|
writeData(finalFile, '127.0.0.1 localhost\n')
|
|
writeData(finalFile, '\n')
|
|
|
|
preamble = os.path.join(BASEDIR_PATH, "myhosts");
|
|
if os.path.isfile(preamble):
|
|
with open(preamble, "r") as f:
|
|
writeData(finalFile, f.read());
|
|
|
|
finalFile.write(fileContents)
|
|
|
|
def updateReadme(numberOfRules):
|
|
with open(README_FILE, "wt") as out:
|
|
for line in open(README_TEMPLATE):
|
|
out.write(line.replace('@NUM_ENTRIES@', "{:,}".format( numberOfRules )))
|
|
|
|
def moveHostsFileIntoPlace(finalFile):
|
|
if (os.name == 'posix'):
|
|
print ('Moving the file requires administrative privileges. You might need to enter your password.')
|
|
if(subprocess.call(["/usr/bin/sudo", "cp", os.path.abspath(finalFile.name), "/etc/hosts"])):
|
|
printFailure("Moving the file failed.")
|
|
print ('Flushing the DNS Cache to utilize new hosts file...')
|
|
if (platform.system() == 'Darwin'):
|
|
if(subprocess.call(["/usr/bin/sudo", "killall", "-HUP", "mDNSResponder"])):
|
|
printFailure("Flushing the DNS Cache failed.")
|
|
else:
|
|
if os.path.isfile("/etc/rc.d/init.d/nscd"):
|
|
if(subprocess.call(["/usr/bin/sudo", "/etc/rc.d/init.d/nscd", "restart"])):
|
|
printFailure("Flushing the DNS Cache failed.")
|
|
elif (os.name == 'nt'):
|
|
print ('Automatically moving the hosts file in place is not yet supported.')
|
|
print ('Please move the generated file to %SystemRoot%\system32\drivers\etc\hosts')
|
|
|
|
# End File Logic
|
|
|
|
# Helper Functions
|
|
## {{{ http://code.activestate.com/recipes/577058/ (r2)
|
|
def query_yes_no(question, default="yes"):
|
|
"""Ask a yes/no question via raw_input() and return their answer.
|
|
|
|
"question" is a string that is presented to the user.
|
|
"default" is the presumed answer if the user just hits <Enter>.
|
|
It must be "yes" (the default), "no" or None (meaning
|
|
an answer is required of the user).
|
|
|
|
The "answer" return value is one of "yes" or "no".
|
|
"""
|
|
valid = {"yes":"yes", "y":"yes", "ye":"yes",
|
|
"no":"no", "n":"no"}
|
|
if default == None:
|
|
prompt = " [y/n] "
|
|
elif default == "yes":
|
|
prompt = " [Y/n] "
|
|
elif default == "no":
|
|
prompt = " [y/N] "
|
|
else:
|
|
raise ValueError("invalid default answer: '%s'" % default)
|
|
|
|
while 1:
|
|
sys.stdout.write(colorize(question, colors.PROMPT) + prompt)
|
|
# Changed to be cross-python
|
|
choice = myInput().lower()
|
|
if default is not None and choice == '':
|
|
return default
|
|
elif choice in valid.keys():
|
|
return valid[choice]
|
|
else:
|
|
printFailure("Please respond with 'yes' or 'no' "\
|
|
"(or 'y' or 'n').\n")
|
|
## end of http://code.activestate.com/recipes/577058/ }}}
|
|
|
|
def isValidDomainFormat(domain):
|
|
if (domain == ''):
|
|
print ("You didn\'t enter a domain. Try again.")
|
|
return False
|
|
domainRegex = re.compile("www\d{0,3}[.]|https?")
|
|
if (domainRegex.match(domain)):
|
|
print ("The domain " + domain + " is not valid. Do not include www.domain.com or http(s)://domain.com. Try again.")
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
# Colors
|
|
class colors:
|
|
PROMPT = '\033[94m'
|
|
SUCCESS = '\033[92m'
|
|
FAIL = '\033[91m'
|
|
ENDC = '\033[0m'
|
|
|
|
def colorize(text, color):
|
|
return color + text + colors.ENDC
|
|
|
|
def printSuccess(text):
|
|
print (colorize(text, colors.SUCCESS))
|
|
|
|
def printFailure(text):
|
|
print (colorize(text, colors.FAIL))
|
|
# End Helper Functions
|
|
|
|
if __name__ == "__main__":
|
|
main()
|