hosts/updateHostsFile.py

401 lines
13 KiB
Python
Raw Normal View History

2015-10-26 23:46:48 +01:00
#!/usr/bin/env python
# Script by Ben Limmer
# https://github.com/l1m5
#
# This Python script will combine all the host files you provide
# as sources into one, unique host file to keep you internet browsing happy.
2015-10-26 23:46:48 +01:00
# Making Python 2 compatible with Python 3
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import platform
import re
import string
import subprocess
import sys
import tempfile
2015-11-05 13:38:08 +01:00
import glob
2015-10-26 23:16:55 +01:00
# zip files are not used actually, support deleted
# StringIO is not needed in Python 3
# Python 3 works differently with urlopen
2015-10-26 23:46:48 +01:00
# Supporting urlopen in Python 2 and Python 3
try:
from urllib.parse import urlparse, urlencode
from urllib.request import urlopen, Request
from urllib.error import HTTPError
except ImportError:
from urlparse import urlparse
from urllib import urlencode
from urllib2 import urlopen, Request, HTTPError
# This function handles both Python 2 and Python 3
def getFileByUrl(url):
2015-10-29 00:33:16 +01:00
try:
2015-10-26 23:46:48 +01:00
f = urlopen(url)
return f.read().decode( "UTF-8" )
2015-10-26 23:46:48 +01:00
except:
print ( "Problem getting file: ", url );
# raise
2015-10-26 23:46:48 +01:00
2015-10-26 23:16:55 +01:00
# In Python 3 "print" is a function, braces are added everywhere
2015-10-26 23:46:48 +01:00
# Detecting Python 3 for version-dependent implementations
Python3 = False;
2015-10-26 23:46:48 +01:00
cur_version = sys.version_info
if cur_version >= ( 3, 0 ):
Python3 = True;
2015-10-29 00:33:16 +01:00
2015-10-26 23:46:48 +01:00
# This function works in both Python 2 and Python 3
def myInput( msg = "" ):
2015-10-26 23:46:48 +01:00
if Python3:
return input( msg );
2015-10-26 23:46:48 +01:00
else:
return raw_input( msg );
2015-10-26 23:46:48 +01:00
# Cross-python writing function
def writeData( f, data ):
2015-10-26 23:46:48 +01:00
if Python3:
f.write( bytes( data, 'UTF-8' ))
2015-10-26 23:46:48 +01:00
else:
f.write( str( data ).encode( 'UTF-8' ))
2015-10-29 00:33:16 +01:00
2015-11-05 13:38:08 +01:00
# This function doesn't list hidden files
def listdir_nohidden( path ):
return glob.glob( os.path.join( path, '*' ))
2015-10-26 23:46:48 +01:00
# Project Settings
BASEDIR_PATH = os.path.dirname( os.path.realpath( __file__ ))
DATA_PATH = os.path.join( BASEDIR_PATH, 'data' )
DATA_FILENAMES = 'hosts'
UPDATE_URL_FILENAME = 'update.info'
SOURCES = listdir_nohidden( DATA_PATH )
README_TEMPLATE = os.path.join( BASEDIR_PATH, 'readme_template.md' )
README_FILE = os.path.join( BASEDIR_PATH, 'readme.md' )
TARGET_HOST = '0.0.0.0'
WHITELIST_FILE = os.path.join( BASEDIR_PATH, 'whitelist' )
# Exclusions
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
EXCLUSIONS = []
# Common domains to exclude
COMMON_EXCLUSIONS = ['hulu.com']
# Global vars
exclusionRegexs = []
numberOfRules = 0
def main():
promptForUpdate()
promptForExclusions()
mergeFile = createInitialFile()
removeOldHostsFile()
finalFile = removeDupsAndExcl( mergeFile )
finalizeFile( finalFile )
updateReadme( numberOfRules )
printSuccess( 'Success! Your new hosts file has been prepared.\nIt contains ' + "{:,}".format( numberOfRules ) + ' unique entries.' )
promptForMove( finalFile )
# Prompt the User
def promptForUpdate():
# Create hosts file if it doesn't exists
if not os.path.isfile( os.path.join(BASEDIR_PATH, 'hosts' )):
try:
file = open( os.path.join( BASEDIR_PATH, 'hosts' ), 'w+' ).close()
except:
printFailure( "ERROR: No 'hosts' file in the folder, try creating one manually" )
response = query_yes_no( "Do you want to update all data sources?" )
if ( response == "yes" ):
updateAllSources()
else:
print ( 'OK, we\'ll stick with what we\'ve got locally.' )
def promptForExclusions():
response = query_yes_no( "Do you want to exclude any domains?\n" +
"For example, hulu.com video streaming must be able to access " +
"its tracking and ad servers in order to play video." )
if ( response == "yes" ):
displayExclusionOptions()
else:
print ( 'OK, we\'ll only exclude domains in the whitelist.' )
def promptForMoreCustomExclusions():
response = query_yes_no( "Do you have more domains you want to enter?" )
if ( response == "yes" ):
return True
else:
return False
def promptForMove( finalFile ):
response = query_yes_no( "Do you want to replace your existing hosts file with the newly generated file?" )
if ( response == "yes" ):
moveHostsFileIntoPlace( finalFile )
else:
return False
# End Prompt the User
# Exclusion logic
def displayExclusionOptions():
for exclusionOption in COMMON_EXCLUSIONS:
response = query_yes_no( "Do you want to exclude the domain " + exclusionOption + " ?" )
if ( response == "yes" ):
excludeDomain(exclusionOption)
else:
continue
response = query_yes_no( "Do you want to exclude any other domains?" )
if ( response == "yes" ):
gatherCustomExclusions()
2015-10-29 00:33:16 +01:00
def gatherCustomExclusions():
while True:
2015-10-26 23:46:48 +01:00
# Cross-python Input
domainFromUser = myInput( "Enter the domain you want to exclude (e.g. facebook.com): " )
if (isValidDomainFormat( domainFromUser )):
excludeDomain( domainFromUser )
if ( promptForMoreCustomExclusions() == False ):
return
def excludeDomain( domain ):
exclusionRegexs.append( re.compile( EXCLUSION_PATTERN + domain ))
def matchesExclusions( strippedRule ):
strippedDomain = strippedRule.split()[1]
for exclusionRegex in exclusionRegexs:
if exclusionRegex.search( strippedDomain ):
return True
return False
# End Exclusion Logic
# Update Logic
def updateAllSources():
for source in SOURCES:
updateURL = getUpdateURLFromFile( source )
if ( updateURL == None ):
continue;
print ( 'Updating source ' + source + ' from ' + updateURL )
2015-10-26 23:46:48 +01:00
# Cross-python call
updatedFile = getFileByUrl( updateURL );
updatedFile = updatedFile.replace( '\r', '' ) #get rid of carriage-return symbols
2015-10-26 23:46:48 +01:00
# This is cross-python code
dataFile = open( os.path.join( DATA_PATH, source, DATA_FILENAMES ), 'wb' )
writeData( dataFile, updatedFile );
dataFile.close()
def getUpdateURLFromFile( source ):
pathToUpdateFile = os.path.join( DATA_PATH, source, UPDATE_URL_FILENAME )
if os.path.exists( pathToUpdateFile ):
2015-11-21 04:12:57 +01:00
updateFile = open( pathToUpdateFile, 'r' )
retURL = updateFile.readline().strip()
updateFile.close()
else:
retURL = None
printFailure( 'Warning: Can\'t find the update file for source ' + source + '\n' +
'Make sure that there\'s a file at ' + pathToUpdateFile )
return retURL
# End Update Logic
# File Logic
def createInitialFile():
2015-10-29 00:33:16 +01:00
mergeFile = tempfile.NamedTemporaryFile()
for source in SOURCES:
curFile = open( os.path.join( DATA_PATH, source, DATA_FILENAMES ), 'r' )
2015-10-26 23:46:48 +01:00
#Done in a cross-python way
writeData( mergeFile, curFile.read() )
2015-10-29 00:33:16 +01:00
return mergeFile
def removeDupsAndExcl( mergeFile ):
global numberOfRules
if os.path.isfile( WHITELIST_FILE ):
with open( WHITELIST_FILE, "r" ) as ins:
for line in ins:
EXCLUSIONS.append( line )
2015-10-29 00:33:16 +01:00
# Another mode is required to read and write the file in Python 3
finalFile = open( os.path.join( BASEDIR_PATH, 'hosts' ), 'r+b' )
mergeFile.seek( 0 ) # reset file pointer
hostnames = set()
hostnames.add( "localhost" )
for line in mergeFile.readlines():
write = 'true'
2015-10-26 23:16:55 +01:00
# Explicit encoding
line = line.decode( "UTF-8" )
2015-10-26 23:16:55 +01:00
# Testing the first character doesn't require startswith
if line[0] == '#' or re.match(r'^\s*$', line[0]):
2015-10-26 23:46:48 +01:00
# Cross-python write
writeData( finalFile, line )
continue
2015-11-21 14:19:25 +01:00
if '::1' in line:
continue
2015-10-29 00:33:16 +01:00
strippedRule = stripRule( line ) #strip comments
if len( strippedRule ) == 0:
continue
if matchesExclusions( strippedRule ):
continue
hostname, normalizedRule = normalizeRule( strippedRule ) # normalize rule
for exclude in EXCLUSIONS:
if ( exclude in line ):
write = 'false'
break
if normalizedRule and ( hostname not in hostnames ) and ( write == 'true' ):
writeData( finalFile, normalizedRule )
hostnames.add( hostname )
numberOfRules += 1
mergeFile.close()
return finalFile
def normalizeRule(rule):
result = re.search(r'^[ \t]*(\d+\.\d+\.\d+\.\d+)\s+([\w\.-]+)(.*)', rule )
if result:
target, hostname, suffix = result.groups()
hostname = hostname.lower() # explicitly lowercase hostname
if suffix is not '':
# add suffix as comment only, not as a separate host
return hostname, "%s %s #%s\n" % ( TARGET_HOST, hostname, suffix )
else:
return hostname, "%s %s\n" % ( TARGET_HOST, hostname )
print ( '==>%s<==' % rule )
return None, None
def finalizeFile( finalFile ):
writeOpeningHeader( finalFile )
finalFile.close()
# Some sources put comments around their rules, for accuracy we need to strip them
# the comments are preserved in the output hosts file
def stripRule( line ):
splitLine = line.split()
if ( len( splitLine ) < 2 ) :
# just return blank
return ''
else:
return splitLine[0] + ' ' + splitLine[1]
def writeOpeningHeader(finalFile):
global numberOfRules
finalFile.seek( 0 ) #reset file pointer
fileContents = finalFile.read(); #save content
finalFile.seek( 0 ) #write at the top
writeData( finalFile, '# This file is a merged collection of hosts from reputable sources,\n' )
writeData( finalFile, '# with a dash of crowd sourcing via Github\n#\n' )
writeData( finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n' )
writeData( finalFile, '# ===============================================================\n' )
writeData( finalFile, '\n' )
writeData( finalFile, '127.0.0.1 localhost\n' )
2015-11-23 15:07:36 +01:00
writeData( finalFile, '::1 localhost\n' )
writeData( finalFile, '\n' )
preamble = os.path.join( BASEDIR_PATH, "myhosts" );
if os.path.isfile( preamble ):
with open( preamble, "r" ) as f:
writeData( finalFile, f.read() );
finalFile.write( fileContents )
def updateReadme( numberOfRules ):
with open( README_FILE, "wt" ) as out:
for line in open( README_TEMPLATE ):
out.write( line.replace( '@NUM_ENTRIES@', "{:,}".format( numberOfRules )))
def moveHostsFileIntoPlace( finalFile ):
if ( os.name == 'posix' ):
print ( 'Moving the file requires administrative privileges. You might need to enter your password.' )
if(subprocess.call( ["/usr/bin/sudo", "cp", os.path.abspath( finalFile.name ), "/etc/hosts"] )):
printFailure( "Moving the file failed." )
print ('Flushing the DNS Cache to utilize new hosts file...' )
if ( platform.system() == 'Darwin' ):
if( subprocess.call( ["/usr/bin/sudo", "killall", "-HUP", "mDNSResponder"] )):
printFailure( "Flushing the DNS Cache failed." )
else:
if os.path.isfile( "/etc/rc.d/init.d/nscd" ):
if( subprocess.call(["/usr/bin/sudo", "/etc/rc.d/init.d/nscd", "restart"] )):
printFailure( "Flushing the DNS Cache failed." )
elif ( os.name == 'nt' ):
print ( 'Automatically moving the hosts file in place is not yet supported.' )
print ( 'Please move the generated file to %SystemRoot%\system32\drivers\etc\hosts' )
2015-11-15 22:38:07 +01:00
def removeOldHostsFile(): # hotfix since merging with an already existing hosts file leads to artefacts and duplicates
oldFilePath = os.path.join( BASEDIR_PATH, 'hosts' )
open( oldFilePath, 'a' ).close() # create if already removed, so remove wont raise an error
2015-11-15 22:38:07 +01:00
os.remove(oldFilePath);
open( oldFilePath, 'a' ).close() # create new empty hostsfile
# End File Logic
# Helper Functions
## {{{ http://code.activestate.com/recipes/577058/ (r2)
def query_yes_no( question, default = "yes" ):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is one of "yes" or "no".
"""
valid = {"yes":"yes", "y":"yes", "ye":"yes",
"no":"no", "n":"no"}
if default == None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError( "invalid default answer: '%s'" % default )
while 1:
sys.stdout.write( colorize( question, colors.PROMPT ) + prompt )
2015-10-26 23:46:48 +01:00
# Changed to be cross-python
choice = myInput().lower()
if default is not None and choice == '':
return default
elif choice in valid.keys():
return valid[choice]
else:
printFailure( "Please respond with 'yes' or 'no' "\
"(or 'y' or 'n').\n" )
## end of http://code.activestate.com/recipes/577058/ }}}
def isValidDomainFormat( domain ):
if ( domain == '' ):
print ( "You didn\'t enter a domain. Try again." )
return False
domainRegex = re.compile( "www\d{0,3}[.]|https?" )
if ( domainRegex.match( domain )):
print ( "The domain " + domain + " is not valid. Do not include www.domain.com or http(s)://domain.com. Try again." )
return False
else:
return True
# Colors
class colors:
PROMPT = '\033[94m'
SUCCESS = '\033[92m'
FAIL = '\033[91m'
ENDC = '\033[0m'
def colorize( text, color ):
return color + text + colors.ENDC
def printSuccess( text ):
print ( colorize(text, colors.SUCCESS ))
def printFailure( text ):
print ( colorize( text, colors.FAIL ))
# End Helper Functions
if __name__ == "__main__":
main()