hosts/updateHostsFile.py

400 lines
12 KiB
Python
Raw Normal View History

2015-10-26 23:46:48 +01:00
#!/usr/bin/env python
# Script by Ben Limmer
# https://github.com/l1m5
#
# This simple Python script will combine all the host files you provide
# as sources into one, unique host file to keep you internet browsing happy.
2015-10-26 23:46:48 +01:00
# Making Python 2 compatible with Python 3
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import platform
import re
import string
import subprocess
import sys
import tempfile
2015-11-05 13:38:08 +01:00
import glob
2015-10-26 23:16:55 +01:00
# zip files are not used actually, support deleted
# StringIO is not needed in Python 3
# Python 3 works differently with urlopen
2015-10-26 23:46:48 +01:00
# Supporting urlopen in Python 2 and Python 3
try:
from urllib.parse import urlparse, urlencode
from urllib.request import urlopen, Request
from urllib.error import HTTPError
except ImportError:
from urlparse import urlparse
from urllib import urlencode
from urllib2 import urlopen, Request, HTTPError
# This function handles both Python 2 and Python 3
def getFileByUrl(url):
2015-10-29 00:33:16 +01:00
try:
2015-10-26 23:46:48 +01:00
f = urlopen(url)
2015-10-29 00:33:16 +01:00
return f.read().decode("UTF-8")
2015-10-26 23:46:48 +01:00
except:
print ("Problem getting file: ", url);
# raise
2015-10-26 23:46:48 +01:00
2015-10-26 23:16:55 +01:00
# In Python 3 "print" is a function, braces are added everywhere
2015-10-26 23:46:48 +01:00
# Detecting Python 3 for version-dependent implementations
Python3=False;
cur_version = sys.version_info
if cur_version >= (3, 0):
Python3=True;
2015-10-29 00:33:16 +01:00
2015-10-26 23:46:48 +01:00
# This function works in both Python 2 and Python 3
def myInput(msg=""):
if Python3:
return input(msg);
else:
return raw_input(msg);
# Cross-python writing function
def writeData(f, data):
if Python3:
f.write(bytes(data, 'UTF-8'))
else:
f.write(str(data).encode('UTF-8'))
2015-10-29 00:33:16 +01:00
2015-11-05 13:38:08 +01:00
# This function doesn't list hidden files
def listdir_nohidden(path):
return glob.glob(os.path.join(path, '*'))
2015-10-26 23:46:48 +01:00
# Project Settings
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
2015-10-19 12:27:41 +02:00
DATA_PATH = os.path.join(BASEDIR_PATH, 'data')
DATA_FILENAMES = 'hosts'
UPDATE_URL_FILENAME = 'update.info'
2015-11-05 13:38:08 +01:00
SOURCES = listdir_nohidden(DATA_PATH)
2015-10-19 12:27:41 +02:00
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILE = os.path.join(BASEDIR_PATH, 'readme.md')
TARGET_HOST = '0.0.0.0'
WHITELIST_FILE = os.path.join(BASEDIR_PATH, 'whitelist')
# Exclusions
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
EXCLUSIONS = []
# Common domains to exclude
COMMON_EXCLUSIONS = ['hulu.com']
# Global vars
exclusionRegexs = []
numberOfRules = 0
def main():
promptForUpdate()
promptForExclusions()
mergeFile = createInitialFile()
removeOldHostsFile()
finalFile = removeDupsAndExcl(mergeFile)
finalizeFile(finalFile)
updateReadme(numberOfRules)
printSuccess('Success! Your shiny new hosts file has been prepared.\nIt contains ' + "{:,}".format( numberOfRules ) + ' unique entries.')
promptForMove(finalFile)
# Prompt the User
def promptForUpdate():
# Create hosts file if it doesn't exists
if not os.path.isfile(os.path.join(BASEDIR_PATH, 'hosts')):
try:
file = open(os.path.join(BASEDIR_PATH, 'hosts'), 'w+').close()
except:
printFailure("ERROR: No 'hosts' file in the folder, try creating one manually")
response = query_yes_no("Do you want to update all data sources?")
if (response == "yes"):
updateAllSources()
else:
2015-10-26 23:16:55 +01:00
print ('OK, we\'ll stick with what we\'ve got locally.')
def promptForExclusions():
response = query_yes_no("Do you want to exclude any domains?\n" +
"For example, hulu.com video streaming must be able to access " +
"its tracking and ad servers in order to play video.")
if (response == "yes"):
displayExclusionOptions()
else:
print ('OK, we\'ll only exclude domains in the whitelist.')
def promptForMoreCustomExclusions():
response = query_yes_no("Do you have more domains you want to enter?")
if (response == "yes"):
return True
else:
return False
def promptForMove(finalFile):
response = query_yes_no("Do you want to replace your existing hosts file with the newly generated file?")
if (response == "yes"):
moveHostsFileIntoPlace(finalFile)
else:
return False
# End Prompt the User
# Exclusion logic
def displayExclusionOptions():
for exclusionOption in COMMON_EXCLUSIONS:
response = query_yes_no("Do you want to exclude the domain " + exclusionOption + " ?")
if (response == "yes"):
excludeDomain(exclusionOption)
else:
continue
response = query_yes_no("Do you want to exclude any other domains?")
if (response == "yes"):
gatherCustomExclusions()
2015-10-29 00:33:16 +01:00
def gatherCustomExclusions():
while True:
2015-10-26 23:46:48 +01:00
# Cross-python Input
domainFromUser = myInput("Enter the domain you want to exclude (e.g. facebook.com): ")
if (isValidDomainFormat(domainFromUser)):
excludeDomain(domainFromUser)
if (promptForMoreCustomExclusions() == False):
return
def excludeDomain(domain):
exclusionRegexs.append(re.compile(EXCLUSION_PATTERN + domain))
def matchesExclusions(strippedRule):
strippedDomain = strippedRule.split()[1]
for exclusionRegex in exclusionRegexs:
if exclusionRegex.search(strippedDomain):
return True
return False
# End Exclusion Logic
# Update Logic
def updateAllSources():
for source in SOURCES:
updateURL = getUpdateURLFromFile(source)
if (updateURL == None):
continue;
2015-10-26 23:16:55 +01:00
print ('Updating source ' + source + ' from ' + updateURL)
2015-10-26 23:46:48 +01:00
# Cross-python call
updatedFile = getFileByUrl(updateURL);
2015-10-26 23:16:55 +01:00
updatedFile = updatedFile.replace('\r', '') #get rid of carriage-return symbols
2015-10-26 23:46:48 +01:00
# This is cross-python code
dataFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'wb')
writeData(dataFile, updatedFile);
dataFile.close()
def getUpdateURLFromFile(source):
2015-10-19 12:27:41 +02:00
pathToUpdateFile = os.path.join(DATA_PATH, source, UPDATE_URL_FILENAME)
if os.path.exists(pathToUpdateFile):
updateFile = open(pathToUpdateFile, 'r')
retURL = updateFile.readline().strip()
updateFile.close()
else:
retURL = None
printFailure('Warning: Can\'t find the update file for source ' + source + '\n' +
'Make sure that there\'s a file at ' + pathToUpdateFile)
return retURL
# End Update Logic
# File Logic
def createInitialFile():
2015-10-29 00:33:16 +01:00
mergeFile = tempfile.NamedTemporaryFile()
for source in SOURCES:
2015-10-19 12:27:41 +02:00
curFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'r')
2015-10-26 23:46:48 +01:00
#Done in a cross-python way
writeData(mergeFile, curFile.read())
2015-10-29 00:33:16 +01:00
return mergeFile
def removeDupsAndExcl(mergeFile):
global numberOfRules
if os.path.isfile(WHITELIST_FILE):
with open(WHITELIST_FILE, "r") as ins:
for line in ins:
EXCLUSIONS.append(line)
2015-10-29 00:33:16 +01:00
# Another mode is required to read and write the file in Python 3
2015-10-26 23:46:48 +01:00
finalFile = open(os.path.join(BASEDIR_PATH, 'hosts'), 'r+b')
mergeFile.seek(0) # reset file pointer
hostnames = set()
hostnames.add("localhost")
for line in mergeFile.readlines():
write = 'true'
2015-10-26 23:16:55 +01:00
# Explicit encoding
line = line.decode("UTF-8")
# Testing the first character doesn't require startswith
if line[0] == '#' or re.match(r'^\s*$', line[0]):
2015-10-26 23:46:48 +01:00
# Cross-python write
writeData(finalFile, line)
continue
2015-10-29 00:33:16 +01:00
strippedRule = stripRule(line) #strip comments
if len(strippedRule) == 0:
continue
if matchesExclusions(strippedRule):
continue
hostname, normalizedRule = normalizeRule(strippedRule) # normalize rule
for exclude in EXCLUSIONS:
if (exclude in line):
write = 'false'
break
if normalizedRule and (hostname not in hostnames) and (write == 'true'):
2015-10-26 23:46:48 +01:00
writeData(finalFile, normalizedRule)
hostnames.add(hostname)
numberOfRules += 1
mergeFile.close()
return finalFile
def normalizeRule(rule):
result = re.search(r'^[ \t]*(\d+\.\d+\.\d+\.\d+)\s+([\w\.-]+)(.*)',rule)
if result:
target, hostname, suffix = result.groups()
hostname = hostname.lower() # explicitly lowercase hostname
if suffix is not '':
# add suffix as comment only, not as a separate host
return hostname, "%s %s #%s\n" % (TARGET_HOST, hostname, suffix)
else:
return hostname, "%s %s\n" % (TARGET_HOST, hostname)
2015-10-26 23:16:55 +01:00
print ('==>%s<==' % rule)
return None, None
def finalizeFile(finalFile):
writeOpeningHeader(finalFile)
finalFile.close()
# Some sources put comments around their rules, for accuracy we need to strip them
# the comments are preserved in the output hosts file
def stripRule(line):
splitLine = line.split()
if (len(splitLine) < 2) :
# just return blank
return ''
else:
return splitLine[0] + ' ' + splitLine[1]
def writeOpeningHeader(finalFile):
global numberOfRules
finalFile.seek(0) #reset file pointer
fileContents = finalFile.read(); #save content
finalFile.seek(0) #write at the top
2015-10-26 23:46:48 +01:00
writeData(finalFile, '# This file is a merged collection of hosts from reputable sources,\n')
writeData(finalFile, '# with a dash of crowd sourcing via Github\n#\n')
writeData(finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n')
writeData(finalFile, '# ===============================================================\n')
writeData(finalFile, '\n')
writeData(finalFile, '127.0.0.1 localhost\n')
writeData(finalFile, '\n')
2015-10-30 11:13:21 +01:00
preamble = os.path.join(BASEDIR_PATH, "myhosts");
if os.path.isfile(preamble):
with open(preamble, "r") as f:
writeData(finalFile, f.read());
finalFile.write(fileContents)
def updateReadme(numberOfRules):
with open(README_FILE, "wt") as out:
for line in open(README_TEMPLATE):
out.write(line.replace('@NUM_ENTRIES@', "{:,}".format( numberOfRules )))
def moveHostsFileIntoPlace(finalFile):
if (os.name == 'posix'):
2015-10-26 23:16:55 +01:00
print ('Moving the file requires administrative privileges. You might need to enter your password.')
if(subprocess.call(["/usr/bin/sudo", "cp", os.path.abspath(finalFile.name), "/etc/hosts"])):
printFailure("Moving the file failed.")
2015-10-26 23:16:55 +01:00
print ('Flushing the DNS Cache to utilize new hosts file...')
if (platform.system() == 'Darwin'):
if(subprocess.call(["/usr/bin/sudo", "killall", "-HUP", "mDNSResponder"])):
printFailure("Flushing the DNS Cache failed.")
else:
2015-10-30 11:18:08 +01:00
if os.path.isfile("/etc/rc.d/init.d/nscd"):
if(subprocess.call(["/usr/bin/sudo", "/etc/rc.d/init.d/nscd", "restart"])):
printFailure("Flushing the DNS Cache failed.")
2013-07-14 00:24:06 +02:00
elif (os.name == 'nt'):
2015-10-26 23:16:55 +01:00
print ('Automatically moving the hosts file in place is not yet supported.')
print ('Please move the generated file to %SystemRoot%\system32\drivers\etc\hosts')
2015-11-15 22:38:07 +01:00
def removeOldHostsFile(): # hotfix since merging with an already existing hosts file leads to artefacts and duplicates
oldFilePath=os.path.join(BASEDIR_PATH,'hosts')
open(oldFilePath, 'a').close() # create if already removed, so remove wont raise an error
os.remove(oldFilePath);
open(oldFilePath, 'a').close() # create new empty hostsfile
# End File Logic
# Helper Functions
## {{{ http://code.activestate.com/recipes/577058/ (r2)
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is one of "yes" or "no".
"""
valid = {"yes":"yes", "y":"yes", "ye":"yes",
"no":"no", "n":"no"}
if default == None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while 1:
sys.stdout.write(colorize(question, colors.PROMPT) + prompt)
2015-10-26 23:46:48 +01:00
# Changed to be cross-python
choice = myInput().lower()
if default is not None and choice == '':
return default
elif choice in valid.keys():
return valid[choice]
else:
printFailure("Please respond with 'yes' or 'no' "\
"(or 'y' or 'n').\n")
## end of http://code.activestate.com/recipes/577058/ }}}
def isValidDomainFormat(domain):
if (domain == ''):
2015-10-26 23:16:55 +01:00
print ("You didn\'t enter a domain. Try again.")
return False
domainRegex = re.compile("www\d{0,3}[.]|https?")
if (domainRegex.match(domain)):
2015-10-26 23:16:55 +01:00
print ("The domain " + domain + " is not valid. Do not include www.domain.com or http(s)://domain.com. Try again.")
return False
else:
return True
# Colors
class colors:
PROMPT = '\033[94m'
SUCCESS = '\033[92m'
FAIL = '\033[91m'
ENDC = '\033[0m'
def colorize(text, color):
return color + text + colors.ENDC
def printSuccess(text):
2015-10-26 23:16:55 +01:00
print (colorize(text, colors.SUCCESS))
def printFailure(text):
2015-10-26 23:16:55 +01:00
print (colorize(text, colors.FAIL))
# End Helper Functions
if __name__ == "__main__":
main()