2015-10-26 23:46:48 +01:00
#!/usr/bin/env python
2013-02-10 03:55:04 +01:00
# Script by Ben Limmer
# https://github.com/l1m5
#
2015-11-21 04:10:49 +01:00
# This Python script will combine all the host files you provide
2013-02-10 03:55:04 +01:00
# as sources into one, unique host file to keep you internet browsing happy.
2016-02-08 02:09:42 +01:00
# pylint: disable=invalid-name
# pylint: disable=bad-whitespace
2017-05-15 04:30:36 +02:00
from __future__ import ( absolute_import , division ,
print_function , unicode_literals )
from glob import glob
2015-10-26 23:46:48 +01:00
2013-02-10 03:55:04 +01:00
import os
2013-07-13 23:57:11 +02:00
import platform
2013-02-17 21:51:49 +01:00
import re
2016-02-15 00:15:22 +01:00
import shutil
2013-07-13 23:57:11 +02:00
import subprocess
2013-02-10 03:55:04 +01:00
import sys
import tempfile
2016-02-15 00:15:22 +01:00
import time
2016-12-19 05:17:01 +01:00
import fnmatch
2016-02-11 04:35:09 +01:00
import argparse
2016-03-18 21:10:36 +01:00
import socket
2016-03-23 05:36:01 +01:00
import json
2016-10-17 05:07:06 +02:00
import zipfile
2016-03-24 04:36:47 +01:00
2017-05-15 04:30:36 +02:00
# Detecting Python 3 for version-dependent implementations
PY3 = sys . version_info > = ( 3 , 0 )
if PY3 :
from urllib . request import urlopen
2016-12-18 16:55:35 +01:00
raw_input = input
2017-05-15 04:30:36 +02:00
else : # Python 2
from urllib2 import urlopen
2016-05-02 10:59:54 +02:00
2016-03-24 04:36:47 +01:00
2017-05-15 17:51:17 +02:00
def get_file_by_url ( url ) :
2016-12-18 16:55:35 +01:00
try :
f = urlopen ( url )
return f . read ( ) . decode ( " UTF-8 " )
except :
2017-05-14 20:54:24 +02:00
print ( " Problem getting file: " , url )
2015-10-26 23:46:48 +01:00
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def write_data ( f , data ) :
2017-05-15 04:30:36 +02:00
if PY3 :
2016-12-18 16:55:35 +01:00
f . write ( bytes ( data , " UTF-8 " ) )
else :
f . write ( str ( data ) . encode ( " UTF-8 " ) )
2015-10-29 00:33:16 +01:00
2017-05-15 17:51:17 +02:00
def list_dir_no_hidden ( path ) :
# This function doesn't list hidden files
2016-12-18 17:02:21 +01:00
return glob ( os . path . join ( path , " * " ) )
2015-10-26 23:46:48 +01:00
2017-05-15 17:51:17 +02:00
2013-02-17 21:51:49 +01:00
# Project Settings
2016-03-26 04:44:54 +01:00
BASEDIR_PATH = os . path . dirname ( os . path . realpath ( __file__ ) )
2016-03-24 06:10:13 +01:00
defaults = {
2016-12-18 16:55:35 +01:00
" numberofrules " : 0 ,
" datapath " : os . path . join ( BASEDIR_PATH , " data " ) ,
" freshen " : True ,
" replace " : False ,
" backup " : False ,
" skipstatichosts " : False ,
2017-02-18 04:29:58 +01:00
" keepdomaincomments " : False ,
2016-12-18 16:55:35 +01:00
" extensionspath " : os . path . join ( BASEDIR_PATH , " extensions " ) ,
" extensions " : [ ] ,
" outputsubfolder " : " " ,
2016-12-19 00:32:44 +01:00
" hostfilename " : " hosts " ,
2016-12-18 16:55:35 +01:00
" targetip " : " 0.0.0.0 " ,
" ziphosts " : False ,
" sourcedatafilename " : " update.json " ,
" sourcesdata " : [ ] ,
" readmefilename " : " readme.md " ,
" readmetemplate " : os . path . join ( BASEDIR_PATH , " readme_template.md " ) ,
" readmedata " : { } ,
" readmedatafilename " : os . path . join ( BASEDIR_PATH , " readmeData.json " ) ,
" exclusionpattern " : " ([a-zA-Z \ d-]+ \ .) { 0,} " ,
" exclusionregexs " : [ ] ,
" exclusions " : [ ] ,
" commonexclusions " : [ " hulu.com " ] ,
" blacklistfile " : os . path . join ( BASEDIR_PATH , " blacklist " ) ,
" whitelistfile " : os . path . join ( BASEDIR_PATH , " whitelist " ) }
2016-03-24 06:10:13 +01:00
2016-02-15 23:06:38 +01:00
2017-05-15 17:51:17 +02:00
def main ( ) :
2016-12-18 16:55:35 +01:00
parser = argparse . ArgumentParser ( description = " Creates a unified hosts file from hosts stored in data subfolders. " )
parser . add_argument ( " --auto " , " -a " , dest = " auto " , default = False , action = " store_true " , help = " Run without prompting. " )
parser . add_argument ( " --backup " , " -b " , dest = " backup " , default = False , action = " store_true " , help = " Backup the hosts files before they are overridden. " )
parser . add_argument ( " --extensions " , " -e " , dest = " extensions " , default = [ ] , nargs = " * " , help = " Host extensions to include in the final hosts file. " )
parser . add_argument ( " --ip " , " -i " , dest = " targetip " , default = " 0.0.0.0 " , help = " Target IP address. Default is 0.0.0.0. " )
2017-02-18 04:29:58 +01:00
parser . add_argument ( " --keepdomaincomments " , " -k " , dest = " keepdomaincomments " , default = False , help = " Keep domain line comments. " )
2016-12-18 16:55:35 +01:00
parser . add_argument ( " --zip " , " -z " , dest = " ziphosts " , default = False , action = " store_true " , help = " Additionally create a zip archive of the hosts file. " )
parser . add_argument ( " --noupdate " , " -n " , dest = " noupdate " , default = False , action = " store_true " , help = " Don ' t update from host data sources. " )
parser . add_argument ( " --skipstatichosts " , " -s " , dest = " skipstatichosts " , default = False , action = " store_true " , help = " Skip static localhost entries in the final hosts file. " )
parser . add_argument ( " --output " , " -o " , dest = " outputsubfolder " , default = " " , help = " Output subfolder for generated hosts file. " )
parser . add_argument ( " --replace " , " -r " , dest = " replace " , default = False , action = " store_true " , help = " Replace your active hosts file with this new hosts file. " )
parser . add_argument ( " --flush-dns-cache " , " -f " , dest = " flushdnscache " , default = False , action = " store_true " , help = " Attempt to flush DNS cache after replacing the hosts file. " )
2016-03-24 06:10:13 +01:00
2017-05-15 17:51:17 +02:00
global settings
2016-03-24 06:10:13 +01:00
2016-12-18 16:55:35 +01:00
options = vars ( parser . parse_args ( ) )
2016-03-13 05:44:49 +01:00
2017-05-15 17:51:17 +02:00
options [ " outputpath " ] = os . path . join ( BASEDIR_PATH ,
options [ " outputsubfolder " ] )
2016-12-18 16:55:35 +01:00
options [ " freshen " ] = not options [ " noupdate " ]
2016-03-24 06:10:13 +01:00
2016-12-18 16:55:35 +01:00
settings = { }
settings . update ( defaults )
settings . update ( options )
2016-03-24 06:10:13 +01:00
2017-05-15 17:51:17 +02:00
settings [ " sources " ] = list_dir_no_hidden ( settings [ " datapath " ] )
settings [ " extensionsources " ] = list_dir_no_hidden (
settings [ " extensionspath " ] )
2016-03-29 05:04:34 +02:00
2016-12-18 16:55:35 +01:00
# All our extensions folders...
2017-05-15 17:51:17 +02:00
settings [ " extensions " ] = [ os . path . basename ( item ) for item in
list_dir_no_hidden ( settings [ " extensionspath " ] ) ]
2016-12-18 16:55:35 +01:00
# ... intersected with the extensions passed-in as arguments, then sorted.
2017-05-15 17:51:17 +02:00
settings [ " extensions " ] = sorted ( list (
set ( options [ " extensions " ] ) . intersection ( settings [ " extensions " ] ) ) )
2016-02-22 16:22:26 +01:00
2016-12-18 16:55:35 +01:00
with open ( settings [ " readmedatafilename " ] , " r " ) as f :
settings [ " readmedata " ] = json . load ( f )
2016-03-23 05:36:01 +01:00
2017-05-15 17:51:17 +02:00
prompt_for_update ( )
prompt_for_exclusions ( )
merge_file = create_initial_file ( )
remove_old_hosts_file ( )
final_file = remove_dups_and_excl ( merge_file )
finalize_file ( final_file )
2016-10-17 05:07:06 +02:00
2016-12-18 16:55:35 +01:00
if settings [ " ziphosts " ] :
2017-05-15 17:51:17 +02:00
zf = zipfile . ZipFile ( os . path . join ( settings [ " outputsubfolder " ] ,
" hosts.zip " ) , mode = ' w ' )
zf . write ( os . path . join ( settings [ " outputsubfolder " ] , " hosts " ) ,
compress_type = zipfile . ZIP_DEFLATED , arcname = ' hosts ' )
2016-12-18 16:55:35 +01:00
zf . close ( )
2016-10-17 05:07:06 +02:00
2017-05-15 17:51:17 +02:00
update_readme_data ( )
print_success ( " Success! The hosts file has been saved in folder " +
settings [ " outputsubfolder " ] + " \n It contains " +
" {:,} " . format ( settings [ " numberofrules " ] ) +
" unique entries. " )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
prompt_for_move ( final_file )
2013-02-10 03:55:04 +01:00
2017-05-14 06:09:36 +02:00
2013-02-17 21:51:49 +01:00
# Prompt the User
2017-05-15 17:51:17 +02:00
def prompt_for_update ( ) :
2016-12-18 16:55:35 +01:00
# Create hosts file if it doesn't exists
if not os . path . isfile ( os . path . join ( BASEDIR_PATH , " hosts " ) ) :
try :
open ( os . path . join ( BASEDIR_PATH , " hosts " ) , " w+ " ) . close ( )
except :
2017-05-15 17:51:17 +02:00
print_failure ( " ERROR: No ' hosts ' file in the folder, "
" try creating one manually " )
2016-12-18 16:55:35 +01:00
if not settings [ " freshen " ] :
return
2017-05-14 06:09:36 +02:00
prompt = " Do you want to update all data sources? "
if settings [ " auto " ] or query_yes_no ( prompt ) :
2017-05-15 17:51:17 +02:00
update_all_sources ( )
2017-05-14 06:09:36 +02:00
elif not settings [ " auto " ] :
print ( " OK, we ' ll stick with what we ' ve got locally. " )
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def prompt_for_exclusions ( ) :
2017-05-14 06:09:36 +02:00
prompt = ( " Do you want to exclude any domains? \n "
" For example, hulu.com video streaming must be able to access "
" its tracking and ad servers in order to play video. " )
if not settings [ " auto " ] :
if query_yes_no ( prompt ) :
2017-05-15 17:51:17 +02:00
display_exclusion_options ( )
2017-05-14 06:09:36 +02:00
else :
print ( " OK, we ' ll only exclude domains in the whitelist. " )
2013-02-17 22:39:40 +01:00
2017-05-14 06:09:36 +02:00
2017-05-15 17:51:17 +02:00
def prompt_for_flush_dns_cache ( ) :
2017-05-08 07:08:28 +02:00
if settings [ " flushdnscache " ] :
2017-05-14 05:38:59 +02:00
flush_dns_cache ( )
2017-05-08 07:08:28 +02:00
if not settings [ " auto " ] :
2017-05-14 06:09:36 +02:00
if query_yes_no ( " Attempt to flush the DNS cache? " ) :
2017-05-14 05:38:59 +02:00
flush_dns_cache ( )
2016-06-12 14:04:38 +02:00
2017-05-15 17:51:17 +02:00
def prompt_for_move ( final_file ) :
2016-12-18 16:55:35 +01:00
if settings [ " replace " ] and not settings [ " skipstatichosts " ] :
2017-05-14 06:09:36 +02:00
move_file = True
elif settings [ " auto " ] or settings [ " skipstatichosts " ] :
move_file = False
2016-12-18 16:55:35 +01:00
else :
2017-05-14 06:09:36 +02:00
prompt = ( " Do you want to replace your existing hosts file " +
" with the newly generated file? " )
move_file = query_yes_no ( prompt )
if move_file :
2017-05-15 17:51:17 +02:00
move_hosts_file_into_place ( final_file )
prompt_for_flush_dns_cache ( )
2016-12-18 16:55:35 +01:00
else :
return False
2013-02-17 21:51:49 +01:00
# End Prompt the User
2017-05-14 06:09:36 +02:00
2013-02-17 21:51:49 +01:00
# Exclusion logic
2017-05-15 17:51:17 +02:00
def display_exclusion_options ( ) :
for exclusion_option in settings [ " commonexclusions " ] :
prompt = " Do you want to exclude the domain " + exclusion_option + " ? "
2017-05-14 06:09:36 +02:00
if query_yes_no ( prompt ) :
2017-05-15 17:51:17 +02:00
exclude_domain ( exclusion_option )
2016-12-18 16:55:35 +01:00
else :
continue
2017-05-14 06:09:36 +02:00
if query_yes_no ( " Do you want to exclude any other domains? " ) :
2017-05-14 20:19:57 +02:00
gather_custom_exclusions ( )
2015-10-29 00:33:16 +01:00
2017-05-14 20:19:57 +02:00
def gather_custom_exclusions ( ) :
"""
Gather custom exclusions from the user .
"""
2017-05-14 06:09:36 +02:00
2017-05-14 20:19:57 +02:00
# We continue running this while-loop until the user
# says that they have no more domains to exclude.
2016-12-18 16:55:35 +01:00
while True :
2017-05-14 20:19:57 +02:00
domain_prompt = ( " Enter the domain you want "
" to exclude (e.g. facebook.com): " )
user_domain = raw_input ( domain_prompt )
2017-05-15 17:51:17 +02:00
if is_valid_domain_format ( user_domain ) :
exclude_domain ( user_domain )
2017-05-14 20:19:57 +02:00
continue_prompt = " Do you have more domains you want to enter? "
if not query_yes_no ( continue_prompt ) :
2016-12-18 16:55:35 +01:00
return
2013-02-17 21:51:49 +01:00
2017-05-14 20:19:57 +02:00
2017-05-15 17:51:17 +02:00
def exclude_domain ( domain ) :
settings [ " exclusionregexs " ] . append ( re . compile (
settings [ " exclusionpattern " ] + domain ) )
2013-02-17 21:51:49 +01:00
2017-05-15 17:51:17 +02:00
def matches_exclusions ( stripped_rule ) :
stripped_domain = stripped_rule . split ( ) [ 1 ]
2016-12-18 16:55:35 +01:00
for exclusionRegex in settings [ " exclusionregexs " ] :
2017-05-15 17:51:17 +02:00
if exclusionRegex . search ( stripped_domain ) :
2016-12-18 16:55:35 +01:00
return True
return False
2013-02-17 21:51:49 +01:00
# End Exclusion Logic
2017-05-15 17:51:17 +02:00
2013-02-17 21:51:49 +01:00
# Update Logic
2017-05-15 17:51:17 +02:00
def update_all_sources ( ) :
2016-12-18 17:02:21 +01:00
# Update all hosts files regardless of folder depth
2017-05-15 17:51:17 +02:00
all_sources = recursive_glob ( " * " , settings [ " sourcedatafilename " ] )
2016-12-18 17:02:21 +01:00
2017-05-15 17:51:17 +02:00
for source in all_sources :
update_file = open ( source , " r " )
update_data = json . load ( update_file )
update_url = update_data [ " url " ]
update_file . close ( )
2017-05-15 04:30:36 +02:00
2017-05-15 17:51:17 +02:00
print ( " Updating source " + os . path . dirname (
source ) + " from " + update_url )
2017-05-15 04:30:36 +02:00
2017-05-15 17:51:17 +02:00
updated_file = get_file_by_url ( update_url )
2016-12-18 17:02:21 +01:00
2017-05-15 17:51:17 +02:00
try :
# get rid of carriage-return symbols
updated_file = updated_file . replace ( " \r " , " " )
hosts_file = open ( os . path . join ( BASEDIR_PATH ,
os . path . dirname ( source ) ,
settings [ " hostfilename " ] ) , " wb " )
write_data ( hosts_file , updated_file )
hosts_file . close ( )
2016-12-18 17:02:21 +01:00
except :
2017-05-14 20:54:24 +02:00
print ( " Skipping. " )
2016-03-06 02:28:32 +01:00
# End Update Logic
2017-05-15 17:51:17 +02:00
2013-02-17 21:51:49 +01:00
# File Logic
2017-05-15 17:51:17 +02:00
def create_initial_file ( ) :
merge_file = tempfile . NamedTemporaryFile ( )
2016-12-18 16:55:35 +01:00
# spin the sources for the base file
2017-05-15 17:51:17 +02:00
for source in recursive_glob ( settings [ " datapath " ] ,
settings [ " hostfilename " ] ) :
2016-12-19 01:57:55 +01:00
with open ( source , " r " ) as curFile :
2017-05-15 17:51:17 +02:00
write_data ( merge_file , curFile . read ( ) )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
for source in recursive_glob ( settings [ " datapath " ] ,
settings [ " sourcedatafilename " ] ) :
update_file = open ( source , " r " )
update_data = json . load ( update_file )
settings [ " sourcesdata " ] . append ( update_data )
update_file . close ( )
2016-12-18 16:55:35 +01:00
# spin the sources for extensions to the base file
for source in settings [ " extensions " ] :
2017-05-15 17:51:17 +02:00
for filename in recursive_glob ( os . path . join (
settings [ " extensionspath " ] , source ) , settings [ " hostfilename " ] ) :
2016-12-19 01:57:55 +01:00
with open ( filename , " r " ) as curFile :
2017-05-15 17:51:17 +02:00
write_data ( merge_file , curFile . read ( ) )
for update_file_path in recursive_glob ( os . path . join (
settings [ " extensionspath " ] , source ) ,
settings [ " sourcedatafilename " ] ) :
update_file = open ( update_file_path , " r " )
update_data = json . load ( update_file )
2016-12-19 01:57:55 +01:00
2017-05-15 17:51:17 +02:00
settings [ " sourcesdata " ] . append ( update_data )
update_file . close ( )
2016-12-18 16:55:35 +01:00
if os . path . isfile ( settings [ " blacklistfile " ] ) :
with open ( settings [ " blacklistfile " ] , " r " ) as curFile :
2017-05-15 17:51:17 +02:00
write_data ( merge_file , curFile . read ( ) )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
return merge_file
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def remove_dups_and_excl ( merge_file ) :
number_of_rules = settings [ " numberofrules " ]
2016-12-18 16:55:35 +01:00
if os . path . isfile ( settings [ " whitelistfile " ] ) :
with open ( settings [ " whitelistfile " ] , " r " ) as ins :
for line in ins :
line = line . strip ( " \t \n \r " )
if line and not line . startswith ( " # " ) :
settings [ " exclusions " ] . append ( line )
if not os . path . exists ( settings [ " outputpath " ] ) :
os . makedirs ( settings [ " outputpath " ] )
# Another mode is required to read and write the file in Python 3
2017-05-15 17:51:17 +02:00
final_file = open ( os . path . join ( settings [ " outputpath " ] , " hosts " ) ,
" w+b " if PY3 else " w+ " )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
merge_file . seek ( 0 ) # reset file pointer
hostnames = { " localhost " , " localhost.localdomain " ,
" local " , " broadcasthost " }
2016-12-18 16:55:35 +01:00
exclusions = settings [ " exclusions " ]
2017-05-15 17:51:17 +02:00
for line in merge_file . readlines ( ) :
2016-12-18 16:55:35 +01:00
write = " true "
2017-05-15 17:51:17 +02:00
2016-12-18 16:55:35 +01:00
# Explicit encoding
line = line . decode ( " UTF-8 " )
2017-05-15 17:51:17 +02:00
2016-12-18 16:55:35 +01:00
# replace tabs with space
line = line . replace ( " \t + " , " " )
2017-05-15 17:51:17 +02:00
# see gh-271: trim trailing whitespace, periods
2017-03-30 18:08:50 +02:00
line = line . rstrip ( ' . ' ) + " \n "
2017-05-15 17:51:17 +02:00
2016-12-18 16:55:35 +01:00
# Testing the first character doesn't require startswith
if line [ 0 ] == " # " or re . match ( r ' ^ \ s*$ ' , line [ 0 ] ) :
2017-05-15 17:51:17 +02:00
write_data ( final_file , line )
2016-12-18 16:55:35 +01:00
continue
if " ::1 " in line :
continue
2017-05-15 17:51:17 +02:00
stripped_rule = strip_rule ( line ) # strip comments
if not stripped_rule or matches_exclusions ( stripped_rule ) :
2016-12-18 16:55:35 +01:00
continue
2017-05-15 17:51:17 +02:00
# Normalize rule
hostname , normalized_rule = normalize_rule ( stripped_rule )
2016-12-18 16:55:35 +01:00
for exclude in exclusions :
if exclude in line :
write = " false "
break
2017-05-15 17:51:17 +02:00
if ( normalized_rule and ( hostname not in hostnames )
and ( write == " true " ) ) :
write_data ( final_file , normalized_rule )
2016-12-18 16:55:35 +01:00
hostnames . add ( hostname )
2017-05-15 17:51:17 +02:00
number_of_rules + = 1
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
settings [ " numberofrules " ] = number_of_rules
merge_file . close ( )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
return final_file
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def normalize_rule ( rule ) :
2016-12-18 16:55:35 +01:00
result = re . search ( r ' ^[ \ t]*( \ d+ \ . \ d+ \ . \ d+ \ . \ d+) \ s+([ \ w \ .-]+)(.*) ' , rule )
if result :
2017-05-15 17:51:17 +02:00
hostname , suffix = result . group ( 2 , 3 )
# Explicitly lowercase and trim the hostname
hostname = hostname . lower ( ) . strip ( )
2017-02-18 04:29:58 +01:00
if suffix and settings [ " keepdomaincomments " ] :
2016-12-18 16:55:35 +01:00
# add suffix as comment only, not as a separate host
2017-05-15 17:51:17 +02:00
return hostname , " %s %s # %s \n " % ( settings [ " targetip " ] ,
hostname , suffix )
2016-12-18 16:55:35 +01:00
else :
return hostname , " %s %s \n " % ( settings [ " targetip " ] , hostname )
2017-05-14 20:54:24 +02:00
print ( " ==> %s <== " % rule )
2016-12-18 16:55:35 +01:00
return None , None
2014-05-16 14:13:11 +02:00
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def finalize_file ( final_file ) :
write_opening_header ( final_file )
final_file . close ( )
# Some sources put comments around their rules, for accuracy we need
# to strip them the comments are preserved in the output hosts file
def strip_rule ( line ) :
split_line = line . split ( )
if len ( split_line ) < 2 :
2016-12-18 16:55:35 +01:00
# just return blank
return " "
else :
2017-05-15 17:51:17 +02:00
return split_line [ 0 ] + " " + split_line [ 1 ]
def write_opening_header ( final_file ) :
final_file . seek ( 0 ) # reset file pointer
file_contents = final_file . read ( ) # save content
final_file . seek ( 0 ) # write at the top
write_data ( final_file , " # This hosts file is a merged collection "
" of hosts from reputable sources, \n " )
write_data ( final_file , " # with a dash of crowd sourcing via Github \n # \n " )
write_data ( final_file , " # Date: " + time . strftime (
" % B %d % Y " , time . gmtime ( ) ) + " \n " )
2016-12-18 16:55:35 +01:00
if settings [ " extensions " ] :
2017-05-15 17:51:17 +02:00
write_data ( final_file , " # Extensions added to this file: " + " , " . join (
settings [ " extensions " ] ) + " \n " )
write_data ( final_file , " # Number of unique domains: " + " {:,} \n # \n " . format (
settings [ " numberofrules " ] ) )
write_data ( final_file , " # Fetch the latest version of this file: "
" https://raw.githubusercontent.com/ "
" StevenBlack/hosts/master/ " +
os . path . join ( settings [ " outputsubfolder " ] , " " ) + " hosts \n " )
write_data ( final_file , " # Project home page: https://github.com/ "
" StevenBlack/hosts \n # \n " )
write_data ( final_file , " # =============================== "
" ================================ \n " )
write_data ( final_file , " \n " )
2016-12-18 16:55:35 +01:00
if not settings [ " skipstatichosts " ] :
2017-05-15 17:51:17 +02:00
write_data ( final_file , " 127.0.0.1 localhost \n " )
write_data ( final_file , " 127.0.0.1 localhost.localdomain \n " )
write_data ( final_file , " 127.0.0.1 local \n " )
write_data ( final_file , " 255.255.255.255 broadcasthost \n " )
write_data ( final_file , " ::1 localhost \n " )
write_data ( final_file , " fe80::1 %lo 0 localhost \n " )
write_data ( final_file , " 0.0.0.0 0.0.0.0 \n " )
2016-12-18 16:55:35 +01:00
if platform . system ( ) == " Linux " :
2017-05-15 17:51:17 +02:00
write_data ( final_file , " 127.0.1.1 " + socket . gethostname ( ) + " \n " )
write_data ( final_file , " 127.0.0.53 " + socket . gethostname ( ) + " \n " )
write_data ( final_file , " \n " )
2016-12-18 16:55:35 +01:00
preamble = os . path . join ( BASEDIR_PATH , " myhosts " )
if os . path . isfile ( preamble ) :
with open ( preamble , " r " ) as f :
2017-05-15 17:51:17 +02:00
write_data ( final_file , f . read ( ) )
2016-12-18 16:55:35 +01:00
2017-05-15 17:51:17 +02:00
final_file . write ( file_contents )
2016-02-08 01:18:16 +01:00
2017-05-15 17:51:17 +02:00
def update_readme_data ( ) :
extensions_key = " base "
2016-12-18 16:55:35 +01:00
if settings [ " extensions " ] :
2017-05-15 17:51:17 +02:00
extensions_key = " - " . join ( settings [ " extensions " ] )
2016-03-23 05:36:01 +01:00
2017-05-15 17:51:17 +02:00
generation_data = { " location " : os . path . join (
settings [ " outputsubfolder " ] , " " ) ,
" entries " : settings [ " numberofrules " ] ,
" sourcesdata " : settings [ " sourcesdata " ] }
settings [ " readmedata " ] [ extensions_key ] = generation_data
2016-12-18 16:55:35 +01:00
with open ( settings [ " readmedatafilename " ] , " w " ) as f :
json . dump ( settings [ " readmedata " ] , f )
2016-03-23 05:36:01 +01:00
2016-06-12 14:04:38 +02:00
2017-05-14 05:38:59 +02:00
def move_hosts_file_into_place ( final_file ) :
"""
Move the newly - created hosts file into its correct location on the OS .
2017-05-15 04:30:36 +02:00
2017-05-14 05:38:59 +02:00
For UNIX systems , the hosts file is " etc/hosts. " On Windows , it ' s
2017-05-15 17:51:17 +02:00
" C: \ Windows \ System32 \ drivers \ etc \ hosts. "
2017-05-15 04:30:36 +02:00
2017-05-14 05:38:59 +02:00
For this move to work , you must have administrator privileges to do this .
On UNIX systems , this means having " sudo " access , and on Windows , it
means being able to run command prompt in administrator mode .
Parameters
- - - - - - - - - -
2017-05-15 17:51:17 +02:00
final_file : file object
The newly - created hosts file to move .
2017-05-14 05:38:59 +02:00
"""
filename = os . path . abspath ( final_file . name )
2016-12-18 16:55:35 +01:00
if os . name == " posix " :
2017-05-14 20:54:24 +02:00
print ( " Moving the file requires administrative privileges. "
" You might need to enter your password. " )
2017-05-14 05:38:59 +02:00
if subprocess . call ( [ " /usr/bin/sudo " , " cp " , filename , " /etc/hosts " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Moving the file failed. " )
2016-12-18 16:55:35 +01:00
elif os . name == " nt " :
2017-05-14 05:38:59 +02:00
print ( " Automatically moving the hosts file "
" in place is not yet supported. " )
print ( " Please move the generated file to "
" % SystemRoot % \ system32 \ drivers \ etc \ hosts " )
2016-06-12 14:04:38 +02:00
2017-05-14 05:38:59 +02:00
def flush_dns_cache ( ) :
"""
Flush the DNS cache .
"""
2016-12-18 16:55:35 +01:00
print ( " Flushing the DNS cache to utilize new hosts file... " )
print ( " Flushing the DNS cache requires administrative privileges. " +
" You might need to enter your password. " )
2017-05-14 05:38:59 +02:00
dns_cache_found = False
2016-12-18 16:55:35 +01:00
if platform . system ( ) == " Darwin " :
2017-05-14 05:38:59 +02:00
if subprocess . call ( [ " /usr/bin/sudo " , " killall " ,
" -HUP " , " mDNSResponder " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2017-05-14 05:38:59 +02:00
elif os . name == " nt " :
print ( " Automatically flushing the DNS cache is not yet supported. " )
print ( " Please copy and paste the command ' ipconfig /flushdns ' in "
2017-05-15 04:30:36 +02:00
" administrator command prompt after running this script. " )
2016-12-18 16:55:35 +01:00
else :
if os . path . isfile ( " /etc/rc.d/init.d/nscd " ) :
2017-05-14 05:38:59 +02:00
dns_cache_found = True
if subprocess . call ( [ " /usr/bin/sudo " , " /etc/rc.d/init.d/nscd " ,
" restart " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_success ( " Flushing DNS by restarting nscd succeeded " )
2017-05-14 05:38:59 +02:00
2016-12-18 16:55:35 +01:00
if os . path . isfile ( " /usr/lib/systemd/system/NetworkManager.service " ) :
2017-05-14 05:38:59 +02:00
dns_cache_found = True
if subprocess . call ( [ " /usr/bin/sudo " , " /usr/bin/systemctl " ,
" restart " , " NetworkManager.service " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_success ( " Flushing DNS by restarting "
" NetworkManager succeeded " )
2017-05-14 05:38:59 +02:00
2016-12-18 16:55:35 +01:00
if os . path . isfile ( " /usr/lib/systemd/system/wicd.service " ) :
2017-05-14 05:38:59 +02:00
dns_cache_found = True
if subprocess . call ( [ " /usr/bin/sudo " , " /usr/bin/systemctl " ,
" restart " , " wicd.service " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_success ( " Flushing DNS by restarting wicd succeeded " )
2017-05-14 05:38:59 +02:00
2016-12-18 16:55:35 +01:00
if os . path . isfile ( " /usr/lib/systemd/system/dnsmasq.service " ) :
2017-05-14 05:38:59 +02:00
dns_cache_found = True
if subprocess . call ( [ " /usr/bin/sudo " , " /usr/bin/systemctl " ,
" restart " , " dnsmasq.service " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_success ( " Flushing DNS by restarting dnsmasq succeeded " )
2017-05-14 05:38:59 +02:00
2016-12-18 16:55:35 +01:00
if os . path . isfile ( " /usr/lib/systemd/system/networking.service " ) :
2017-05-14 05:38:59 +02:00
dns_cache_found = True
if subprocess . call ( [ " /usr/bin/sudo " , " /usr/bin/systemctl " ,
" restart " , " networking.service " ] ) :
2017-05-15 17:51:17 +02:00
print_failure ( " Flushing the DNS cache failed. " )
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_success ( " Flushing DNS by restarting "
" networking.service succeeded " )
2017-05-14 05:38:59 +02:00
if not dns_cache_found :
2017-05-15 17:51:17 +02:00
print_failure ( " Unable to determine DNS management tool. " )
2016-06-12 14:04:38 +02:00
2016-02-08 01:15:05 +01:00
2017-05-15 17:51:17 +02:00
# Hotfix since merging with an already existing
# hosts file leads to artifacts and duplicates
def remove_old_hosts_file ( ) :
old_file_path = os . path . join ( BASEDIR_PATH , " hosts " )
# create if already removed, so remove wont raise an error
open ( old_file_path , " a " ) . close ( )
2016-04-04 03:29:47 +02:00
2016-12-18 16:55:35 +01:00
if settings [ " backup " ] :
2017-05-15 17:51:17 +02:00
backup_file_path = os . path . join ( BASEDIR_PATH , " hosts- {} " . format (
time . strftime ( " % Y- % m- %d - % H- % M- % S " ) ) )
2016-04-04 03:29:47 +02:00
2017-05-15 17:51:17 +02:00
# Make a backup copy, marking the date in which the list was updated
shutil . copy ( old_file_path , backup_file_path )
2013-07-14 00:08:31 +02:00
2017-05-15 17:51:17 +02:00
os . remove ( old_file_path )
2017-05-14 06:09:36 +02:00
2017-05-15 17:51:17 +02:00
# Create new empty hosts file
open ( old_file_path , " a " ) . close ( )
2013-02-17 21:51:49 +01:00
# End File Logic
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
2013-02-17 21:51:49 +01:00
# Helper Functions
2017-05-14 06:09:36 +02:00
def query_yes_no ( question , default = " yes " ) :
"""
Ask a yes / no question via raw_input ( ) and get answer from the user .
2016-12-18 16:55:35 +01:00
2017-05-14 06:09:36 +02:00
Inspired by the following implementation :
2016-12-18 16:55:35 +01:00
2017-05-14 06:09:36 +02:00
http : / / code . activestate . com / recipes / 577058
Parameters
- - - - - - - - - -
question : str
The question presented to the user .
default : str , default " yes "
The presumed answer if the user just hits < Enter > . It must be " yes " ,
" no " , or None ( means an answer is required of the user ) .
Returns
- - - - - - -
yes : Whether or not the user replied yes to the question .
2016-12-18 16:55:35 +01:00
"""
2017-05-14 06:09:36 +02:00
valid = { " yes " : " yes " , " y " : " yes " , " ye " : " yes " ,
" no " : " no " , " n " : " no " }
2016-12-18 16:55:35 +01:00
prompt = { None : " [y/n] " ,
" yes " : " [Y/n] " ,
" no " : " [y/N] " } . get ( default , None )
2017-05-14 06:09:36 +02:00
2016-12-18 16:55:35 +01:00
if not prompt :
raise ValueError ( " invalid default answer: ' %s ' " % default )
2017-05-14 06:09:36 +02:00
reply = None
while not reply :
2017-05-15 17:51:17 +02:00
sys . stdout . write ( colorize ( question , Colors . PROMPT ) + prompt )
2017-05-14 06:09:36 +02:00
2016-12-18 16:55:35 +01:00
choice = raw_input ( ) . lower ( )
2017-05-14 06:09:36 +02:00
reply = None
2016-12-18 16:55:35 +01:00
if default and not choice :
2017-05-14 06:09:36 +02:00
reply = default
2016-12-18 16:55:35 +01:00
elif choice in valid :
2017-05-14 06:09:36 +02:00
reply = valid [ choice ]
2016-12-18 16:55:35 +01:00
else :
2017-05-15 17:51:17 +02:00
print_failure ( " Please respond with ' yes ' or ' no ' "
" (or ' y ' or ' n ' ). \n " )
2017-05-14 06:09:36 +02:00
return reply == " yes "
2013-02-10 03:55:04 +01:00
2017-05-15 17:51:17 +02:00
def is_valid_domain_format ( domain ) :
2016-12-18 16:55:35 +01:00
if domain == " " :
2017-05-14 20:54:24 +02:00
print ( " You didn ' t enter a domain. Try again. " )
2016-12-18 16:55:35 +01:00
return False
2017-05-15 17:51:17 +02:00
domain_regex = re . compile ( " www \ d { 0,3}[.]|https? " )
if domain_regex . match ( domain ) :
2017-05-14 20:54:24 +02:00
print ( " The domain " + domain +
" is not valid. Do not include "
" www.domain.com or http(s)://domain.com. Try again. " )
2016-12-18 16:55:35 +01:00
return False
else :
return True
2013-02-17 22:49:16 +01:00
2017-05-15 17:51:17 +02:00
2016-12-19 05:17:01 +01:00
# A version-independent glob( ... "/**/" ... )
2017-05-15 17:51:17 +02:00
def recursive_glob ( stem , file_pattern ) :
if sys . version_info > = ( 3 , 5 ) :
return glob ( stem + " /**/ " + file_pattern , recursive = True )
2016-12-19 05:17:01 +01:00
else :
if stem == " * " :
stem = " . "
matches = [ ]
for root , dirnames , filenames in os . walk ( stem ) :
2017-05-15 17:51:17 +02:00
for filename in fnmatch . filter ( filenames , file_pattern ) :
2016-12-19 05:17:01 +01:00
matches . append ( os . path . join ( root , filename ) )
return matches
2013-02-17 22:49:16 +01:00
# Colors
2017-05-15 17:51:17 +02:00
class Colors ( object ) :
PROMPT = " \033 [94m "
2016-12-18 16:55:35 +01:00
SUCCESS = " \033 [92m "
2017-05-15 17:51:17 +02:00
FAIL = " \033 [91m "
ENDC = " \033 [0m "
2013-02-17 22:49:16 +01:00
2016-02-08 01:18:16 +01:00
def colorize ( text , color ) :
2017-05-15 17:51:17 +02:00
return color + text + Colors . ENDC
def print_success ( text ) :
print ( colorize ( text , Colors . SUCCESS ) )
2013-02-17 22:49:16 +01:00
2017-05-15 17:51:17 +02:00
def print_failure ( text ) :
print ( colorize ( text , Colors . FAIL ) )
2013-02-17 21:51:49 +01:00
# End Helper Functions
2013-02-10 03:55:04 +01:00
if __name__ == " __main__ " :
2016-12-18 16:55:35 +01:00
main ( )