hosts/updateReadme.py

145 lines
4.7 KiB
Python
Raw Normal View History

2016-03-23 05:35:26 +01:00
#!/usr/bin/env python
# Script by Steven Black
# https://github.com/StevenBlack
#
# This Python script will update the readme files in this repo.
2018-08-10 15:52:20 +02:00
import json
2016-03-23 05:35:26 +01:00
import os
import sys
import time
2018-08-10 15:52:20 +02:00
from string import Template
2016-03-23 05:35:26 +01:00
# Project Settings
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILENAME = 'readme.md'
2016-03-23 05:35:26 +01:00
README_DATA_FILENAME = "readmeData.json"
# Detecting Python 3 for version-dependent implementations
PY3 = sys.version_info >= (3, 0)
2016-03-23 05:35:26 +01:00
def main():
s = Template('${description} | [Readme](https://github.com/StevenBlack/'
'hosts/blob/master/${location}readme.md) | '
'[link](https://raw.githubusercontent.com/StevenBlack/'
'hosts/master/${location}hosts) | '
2017-08-20 18:08:28 +02:00
'${fmtentries} | '
'[link](http://sbc.io/hosts/${location}hosts)')
2016-03-23 05:35:26 +01:00
with open(README_DATA_FILENAME, 'r') as f:
data = json.load(f)
2016-03-23 05:35:26 +01:00
if PY3:
2016-03-23 05:35:26 +01:00
keys = list(data.keys())
else:
keys = data.keys()
# Sort by the number of en-dashes in the key
# and then by the key string itself.
keys.sort(key=lambda item: (item.count("-"), item))
toc_rows = ""
2016-03-24 06:08:48 +01:00
for key in keys:
data[key]["fmtentries"] = "{:,}".format(data[key]["entries"])
2016-03-24 06:08:48 +01:00
if key == "base":
data[key]["description"] = 'Unified hosts = **(adware + malware)**'
else:
data[key]["description"] = ('Unified hosts **+ ' +
key.replace("-", " + ") + '**')
2016-03-24 06:08:48 +01:00
toc_rows += s.substitute(data[key]) + "\n"
2016-03-24 06:08:48 +01:00
row_defaults = {
"name": "",
"description": "",
"homeurl": "",
"frequency": "",
"issues": "",
"url": "",
"license": "",
2018-03-18 04:41:24 +01:00
"issues": ""}
t = Template('${name} | ${description} |[link](${homeurl})'
' | [raw](${url}) | ${frequency} | ${license} | [issues](${issues}) ')
2016-03-24 06:08:48 +01:00
2016-03-23 05:35:26 +01:00
for key in keys:
extensions = key.replace("-", ", ")
extensions_str = "* Extensions: **" + extensions + "**."
extensions_header = "with " + extensions + " extensions"
source_rows = ""
source_list = data[key]["sourcesdata"]
for source in source_list:
this_row = {}
this_row.update(row_defaults)
this_row.update(source)
source_rows += t.substitute(this_row) + "\n"
with open(os.path.join(data[key]["location"],
README_FILENAME), "wt") as out:
2016-03-23 05:35:26 +01:00
for line in open(README_TEMPLATE):
line = line.replace('@GEN_DATE@', time.strftime("%B %d %Y",
time.gmtime()))
line = line.replace('@EXTENSIONS@',
decode_line(extensions_str))
line = line.replace('@EXTENSIONS_HEADER@',
decode_line(extensions_header))
line = line.replace('@NUM_ENTRIES@',
"{:,}".format(data[key]["entries"]))
line = line.replace('@SUBFOLDER@',
decode_line(os.path.join(
data[key]["location"], '')))
line = line.replace('@TOCROWS@',
decode_line(toc_rows))
line = line.replace('@SOURCEROWS@',
decode_line(source_rows))
out.write(decode_line(line))
def decode_line(line):
"""
Python 2 compatible method for decoding unicode lines.
Parameters
----------
line : str
The unicode string to decode.
Returns
-------
decoded_str : str
Decoded unicode string.
"""
# Python 3.x has no unicode issues.
if PY3:
return line
# The biggest Python 2.x compatibility issue is the decoding of the
# en-dash. It either takes the form of u"\u2013" or "\xe2\x80\x93."
#
# This attempts to convert "\xe2\x80\x93" to u"\u2013" if necessary.
# If the character is already in the form of u"\u2013," this will
# raise an UnicodeEncodeError.
#
# In general, this line of code will allow us to convert unicode,
# UTF-8 encoded characters into pure unicode.
try:
line = line.decode("UTF-8")
except UnicodeEncodeError:
pass
# Replace u"\u2013" with the en-dash, so we now can decode.
#
# We can add additional "replace" lines in case there are other unicode
# literals that Python 2.x cannot handle.
line = line.replace(u"\u2013", "-")
return str(line.decode("UTF-8"))
2016-03-23 05:35:26 +01:00
if __name__ == "__main__":
main()