diff --git a/readme.md b/readme.md index 218d466af..b0e26f743 100644 --- a/readme.md +++ b/readme.md @@ -2,7 +2,7 @@ This repo consolidates several reputable `hosts` files and consolidates them into a single hosts file that you can use. -**Currently this hosts file contains 24245 unique entries.** +**Currently this hosts file contains 22863 unique entries.** ## Source of host data amalgamated here diff --git a/readme_template.md b/readme_template.md new file mode 100644 index 000000000..0dd390519 --- /dev/null +++ b/readme_template.md @@ -0,0 +1,77 @@ +#Amalgamated hosts file + +This repo consolidates several reputable `hosts` files and consolidates them into a single hosts file that you can use. + +**Currently this hosts file contains @NUM_ENTRIES@ unique entries.** + +## Source of host data amalgamated here + +Currently the `hosts` files from the following locations are amalgamated: + +* MVPs.org Hosts file at [http://winhelp2002.mvps.org/hosts.htm](http://winhelp2002.mvps.org/hosts.htm), updated monthly, or thereabouts. +* Dan Pollock at [http://someonewhocares.org/hosts/](http://someonewhocares.org/hosts/) updated regularly. +* My own small list in raw form [here](https://raw.github.com/StevenBlack/hosts/master/data/StevenBlack/hosts). + +You can add any additional sources you'd like under the data/ directory. Provide a copy of the current `hosts` file and a file called +update.info with the URL to the `hosts` file source. This will allow updateHostsFile.py to automatically update your source. + +## Using updateHostsFile.py + +This Python script will generate a unique hosts file based on the sources provided. You can either have the script go out and fetch an updated version over the web (defined by the update.info text file in the source's directory), or it will use the `hosts` file you already have checked into your source's data folder. + +Usage + + python updateHostsFile.py + +## What is a hosts file? + +A hosts file, named `hosts` (with no file extension), is a plain-text file used by all operating systems to map hostnames to IP addresses. + +In most operating systems, the `hosts` file is preferential to `DNS`. Therefore if a host name is resolved by the `hosts` file, the request never leaves your computer. + +Having a smart `hosts` file goes a long way towards blocking malware, adware, and other irritants. + +For example, to nullify requests to some doubleclick.net servers, adding these lines to your hosts file will do it: + + # block doubleClick's servers + 127.0.0.1 ad.ae.doubleclick.net + 127.0.0.1 ad.ar.doubleclick.net + 127.0.0.1 ad.at.doubleclick.net + 127.0.0.1 ad.au.doubleclick.net + 127.0.0.1 ad.be.doubleclick.net + # etc... + + + +## Location of your hosts file +To modify your current `hosts` file, look for it in the following places and modify it with a text editor. + +**Mac OS X, iOS, Android, Linux**: `/etc/hosts` folder. + +**Windows**: `%SystemRoot%\system32\drivers\etc\hosts` folder. + +## Reloading hosts file +Your operating system will cache DNS lookups. You can either reboot or run the following commands to manually flush your DNS cache once the new hosts file is in place. + +### Mac OS X +Open a Terminal and run: + +`dscacheutil -flushcache` + +### Windows +Open a Command Prompt: + +**Windows XP**: Start -> Run -> `cmd` + +**Windows Vista, 7**: Start Button -> type `cmd` -> right-click Command Prompt -> "Run as Administrator" + +**Windows 8**: Start -> Swipe Up -> All Apps -> Windows System -> right-click Command Prompt -> "Run as Administrator" + +and run: + +`ipconfig /flushdns` + +### Linux +Open a Terminal and run: + +`/etc/rc.d/init.d/nscd restart` \ No newline at end of file diff --git a/updateHostsFile.py b/updateHostsFile.py index b5e6d9eaa..6b2dcf83f 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -19,6 +19,8 @@ DATA_PATH = BASEDIR_PATH + '/data' DATA_FILENAMES = 'hosts' UPDATE_URL_FILENAME = 'update.info' SOURCES = os.listdir(DATA_PATH) +README_TEMPLATE = BASEDIR_PATH + '/readme_template.md' +README_FILE = BASEDIR_PATH + '/readme.md' # Exclusions EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end @@ -28,7 +30,7 @@ COMMON_EXCLUSIONS = ['hulu.com'] # Global vars exclusionRegexs = [] -duplicatesRemoved = 0; +numberOfRules = 0 def main(): promptForUpdate() @@ -36,7 +38,8 @@ def main(): mergeFile = createInitialFile() finalFile = removeDups(mergeFile) finalizeFile(finalFile) - printSuccess('Success! Your shiny new hosts file has been prepared.') + updateReadme(numberOfRules) + printSuccess('Success! Your shiny new hosts file has been prepared.\nIt contains ' + str(numberOfRules) + ' unique entries.') print 'Copy the generated file to /etc/hosts or %SystemRoot%\system32\drivers\etc\hosts' # Prompt the User @@ -134,7 +137,8 @@ def createInitialFile(): return mergeFile def removeDups(mergeFile): - global duplicatesRemoved + global numberOfRules + finalFile = open(BASEDIR_PATH + '/hosts', 'w+b') mergeFile.seek(0) # reset file pointer @@ -149,12 +153,10 @@ def removeDups(mergeFile): if strippedRule not in rules_seen: finalFile.write(line) rules_seen.add(strippedRule) - else: - duplicatesRemoved += 1 + numberOfRules += 1 mergeFile.close() - printSuccess('Removed ' + str(duplicatesRemoved) + ' duplicates from the merged file') return finalFile def finalizeFile(finalFile): @@ -172,7 +174,7 @@ def stripRule(line): return splitLine[0] + ' ' + splitLine[1] def writeOpeningHeader(finalFile): - global duplicatesRemoved + global numberOfRules finalFile.seek(0) #reset file pointer fileContents = finalFile.read(); #save content finalFile.seek(0) #write at the top @@ -183,10 +185,15 @@ def writeOpeningHeader(finalFile): for source in SOURCES: finalFile.write('# ' + source + '\n') finalFile.write('#\n') - finalFile.write('# Take Note:\n') - finalFile.write('# Merging these sources produced ' + str(duplicatesRemoved) + ' duplicates\n') + finalFile.write('# Merging these sources produced ' + str(numberOfRules) + ' unique entries\n') finalFile.write('# ===============================================================\n') finalFile.write(fileContents) + +def updateReadme(numberOfRules): + with open(README_FILE, "wt") as out: + for line in open(README_TEMPLATE): + out.write(line.replace('@NUM_ENTRIES@', str(numberOfRules))) + # End File Logic # Helper Functions