hosts/updateReadme.py

#!/usr/bin/env python

# Script by Steven Black
# https://github.com/StevenBlack
#
# This Python script will update the readme files in this repo.

import json
import os
import sys
import time
from string import Template

# Project Settings
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILENAME = 'readme.md'
README_DATA_FILENAME = "readmeData.json"

# Detecting Python 3 for version-dependent implementations
PY3 = sys.version_info >= (3, 0)


def main():
    s = Template('${description} | [Readme](https://github.com/StevenBlack/'
                 'hosts/blob/master/${location}readme.md) | '
                 '[link](https://raw.githubusercontent.com/StevenBlack/'
                 'hosts/master/${location}hosts) | '
                 '${fmtentries} | '
                 '[link](http://sbc.io/hosts/${location}hosts)')
    with open(README_DATA_FILENAME, 'r') as f:
        data = json.load(f)

    if PY3:
        keys = list(data.keys())
    else:
        keys = data.keys()

    # Sort by the number of en-dashes in the key
    # and then by the key string itself.
    keys.sort(key=lambda item: (item.count("-"), item))

    toc_rows = ""
    for key in keys:
        data[key]["fmtentries"] = "{:,}".format(data[key]["entries"])
        if key == "base":
            data[key]["description"] = 'Unified hosts = **(adware + malware)**'
        else:
            data[key]["description"] = ('Unified hosts **+ ' +
                                        key.replace("-", " + ") + '**')

        toc_rows += s.substitute(data[key]) + "\n"

    row_defaults = {
        "name": "",
        "description": "",
        "homeurl": "",
        "frequency": "",
        "issues": "",
        "url": "",
        "license": "",
        "issues": ""}

    t = Template('${name} | ${description} |[link](${homeurl})'
                 ' | [raw](${url}) | ${frequency} | ${license}  | [issues](${issues}) ')

    for key in keys:
        extensions = key.replace("-", ", ")
        extensions_str = "* Extensions: **" + extensions + "**."
        extensions_header = "with " + extensions + " extensions"

        source_rows = ""
        source_list = data[key]["sourcesdata"]

        for source in source_list:
            this_row = {}
            this_row.update(row_defaults)
            this_row.update(source)
            source_rows += t.substitute(this_row) + "\n"

        with open(os.path.join(data[key]["location"],
                               README_FILENAME), "wt") as out:
            for line in open(README_TEMPLATE):
                line = line.replace('@GEN_DATE@', time.strftime("%B %d %Y",
                                                                time.gmtime()))
                line = line.replace('@EXTENSIONS@',
                                    decode_line(extensions_str))
                line = line.replace('@EXTENSIONS_HEADER@',
                                    decode_line(extensions_header))
                line = line.replace('@NUM_ENTRIES@',
                                    "{:,}".format(data[key]["entries"]))
                line = line.replace('@SUBFOLDER@',
                                    decode_line(os.path.join(
                                        data[key]["location"], '')))
                line = line.replace('@TOCROWS@',
                                    decode_line(toc_rows))
                line = line.replace('@SOURCEROWS@',
                                    decode_line(source_rows))
                out.write(decode_line(line))


def decode_line(line):
    """
    Python 2 compatible method for decoding unicode lines.

    Parameters
    ----------
    line : str
        The unicode string to decode.

    Returns
    -------
    decoded_str : str
        Decoded unicode string.
    """

    # Python 3.x has no unicode issues.
    if PY3:
        return line

    # The biggest Python 2.x compatibility issue is the decoding of the
    # en-dash. It either takes the form of u"\u2013" or "\xe2\x80\x93."
    #
    # This attempts to convert "\xe2\x80\x93" to u"\u2013" if necessary.
    # If the character is already in the form of u"\u2013," this will
    # raise an UnicodeEncodeError.
    #
    # In general, this line of code will allow us to convert unicode,
    # UTF-8 encoded characters into pure unicode.
    try:
        line = line.decode("UTF-8")
    except UnicodeEncodeError:
        pass

    # Replace u"\u2013" with the en-dash, so we now can decode.
    #
    # We can add additional "replace" lines in case there are other unicode
    # literals that Python 2.x cannot handle.
    line = line.replace(u"\u2013", "-")
    return str(line.decode("UTF-8"))


if __name__ == "__main__":
    main()
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`#!/usr/bin/env python`

			`# Script by Steven Black`
			`# https://github.com/StevenBlack`
			`#`
			`# This Python script will update the readme files in this repo.`

Sorting of the dependencies 2018-08-10 15:52:20 +02:00			`import json`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`import os`
			`import sys`
			`import time`
Sorting of the dependencies 2018-08-10 15:52:20 +02:00			`from string import Template`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00
			`# Project Settings`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))`
			`README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')`
			`README_FILENAME = 'readme.md'`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`README_DATA_FILENAME = "readmeData.json"`

Lift restriction on Python 3.x for makeHosts* 2017-05-16 05:57:14 +02:00			`# Detecting Python 3 for version-dependent implementations`
			`PY3 = sys.version_info >= (3, 0)`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00

Lift restriction on Python 3.x for makeHosts* 2017-05-16 05:57:14 +02:00			`def main():`
Fix flake8 errors in updateReadme.py xref commit 8fb2df 2017-08-20 20:58:14 +02:00			`s = Template('${description} \| [Readme](https://github.com/StevenBlack/'`
			`'hosts/blob/master/${location}readme.md) \| '`
			`'[link](https://raw.githubusercontent.com/StevenBlack/'`
			`'hosts/master/${location}hosts) \| '`
Fix typo. 2017-08-20 18:08:28 +02:00			`'${fmtentries} \| '`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`'[link](http://sbc.io/hosts/${location}hosts)')`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`with open(README_DATA_FILENAME, 'r') as f:`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`data = json.load(f)`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00
Lift restriction on Python 3.x for makeHosts* 2017-05-16 05:57:14 +02:00			`if PY3:`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`keys = list(data.keys())`
			`else:`
			`keys = data.keys()`

Document all functions in Python scripts 2017-05-18 05:17:37 +02:00			`# Sort by the number of en-dashes in the key`
			`# and then by the key string itself.`
			`keys.sort(key=lambda item: (item.count("-"), item))`
Added metadata to Sinfonietta’s hosts source files. 2016-12-11 19:44:51 +01:00
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`toc_rows = ""`
Tweaks to updateReadme.py 2016-03-24 06:08:48 +01:00			`for key in keys:`
Redme tweaks, format the numbers in the table. 2016-03-24 22:55:23 +01:00			`data[key]["fmtentries"] = "{:,}".format(data[key]["entries"])`
Tweaks to updateReadme.py 2016-03-24 06:08:48 +01:00			`if key == "base":`
			`data[key]["description"] = 'Unified hosts = (adware + malware)'`
			`else:`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`data[key]["description"] = ('Unified hosts **+ ' +`
			`key.replace("-", " + ") + '**')`
Tweaks to updateReadme.py 2016-03-24 06:08:48 +01:00
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`toc_rows += s.substitute(data[key]) + "\n"`
Tweaks to updateReadme.py 2016-03-24 06:08:48 +01:00
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`row_defaults = {`
Create and fill a table of sources used for the base hosts file. 2016-12-11 18:48:57 +01:00			`"name": "",`
			`"description": "",`
			`"homeurl": "",`
			`"frequency": "",`
			`"issues": "",`
Add known licenses data to json metadata 2017-10-25 05:17:19 +02:00			`"url": "",`
Include source data issues links to help folks to report problems. 2018-03-18 04:04:57 +01:00			`"license": "",`
Fix lint issue. 2018-03-18 04:41:24 +01:00			`"issues": ""}`
Create and fill a table of sources used for the base hosts file. 2016-12-11 18:48:57 +01:00
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`t = Template('${name} \| ${description} \|[link](${homeurl})'`
Include source data issues links to help folks to report problems. 2018-03-18 04:04:57 +01:00			`' \| [raw](${url}) \| ${frequency} \| ${license} \| [issues](${issues}) ')`
Tweaks to updateReadme.py 2016-03-24 06:08:48 +01:00
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`for key in keys:`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`extensions = key.replace("-", ", ")`
			`extensions_str = "* Extensions: " + extensions + "."`
			`extensions_header = "with " + extensions + " extensions"`

			`source_rows = ""`
			`source_list = data[key]["sourcesdata"]`

			`for source in source_list:`
			`this_row = {}`
			`this_row.update(row_defaults)`
			`this_row.update(source)`
			`source_rows += t.substitute(this_row) + "\n"`

			`with open(os.path.join(data[key]["location"],`
			`README_FILENAME), "wt") as out:`
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00			`for line in open(README_TEMPLATE):`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`line = line.replace('@GEN_DATE@', time.strftime("%B %d %Y",`
			`time.gmtime()))`
Lift restriction on Python 3.x for makeHosts* 2017-05-16 05:57:14 +02:00			`line = line.replace('@EXTENSIONS@',`
			`decode_line(extensions_str))`
			`line = line.replace('@EXTENSIONS_HEADER@',`
			`decode_line(extensions_header))`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00			`line = line.replace('@NUM_ENTRIES@',`
			`"{:,}".format(data[key]["entries"]))`
			`line = line.replace('@SUBFOLDER@',`
Lift restriction on Python 3.x for makeHosts* 2017-05-16 05:57:14 +02:00			`decode_line(os.path.join(`
			`data[key]["location"], '')))`
			`line = line.replace('@TOCROWS@',`
			`decode_line(toc_rows))`
			`line = line.replace('@SOURCEROWS@',`
			`decode_line(source_rows))`
			`out.write(decode_line(line))`


			`def decode_line(line):`
			`"""`
			`Python 2 compatible method for decoding unicode lines.`

			`Parameters`
			`----------`
			`line : str`
			`The unicode string to decode.`

			`Returns`
			`-------`
			`decoded_str : str`
			`Decoded unicode string.`
			`"""`

			`# Python 3.x has no unicode issues.`
			`if PY3:`
			`return line`

			`# The biggest Python 2.x compatibility issue is the decoding of the`
			`# en-dash. It either takes the form of u"\u2013" or "\xe2\x80\x93."`
			`#`
			`# This attempts to convert "\xe2\x80\x93" to u"\u2013" if necessary.`
			`# If the character is already in the form of u"\u2013," this will`
			`# raise an UnicodeEncodeError.`
			`#`
			`# In general, this line of code will allow us to convert unicode,`
			`# UTF-8 encoded characters into pure unicode.`
			`try:`
			`line = line.decode("UTF-8")`
			`except UnicodeEncodeError:`
			`pass`

			`# Replace u"\u2013" with the en-dash, so we now can decode.`
			`#`
			`# We can add additional "replace" lines in case there are other unicode`
			`# literals that Python 2.x cannot handle.`
			`line = line.replace(u"\u2013", "-")`
			`return str(line.decode("UTF-8"))`
Patch all style errors in updateReadme.py 2017-05-15 21:13:39 +02:00
First pass at updatereadme.py. 2016-03-23 05:35:26 +01:00
			`if __name__ == "__main__":`
			`main()`