websegura/crawl.sh
2021-02-02 23:00:14 +01:00

46 lines
1.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# TODO: Limpiar las URLs que leemos para que no tengan contrabarras o https:
# Ya que provocan errores al escribir el archivo en disco.
for jsons in _data/general.json; do
if [ -f "$jsons" ]; then
SITES_PR=`cat ${jsons} | jq -r .webs[].url`
SITES="${SITES}
${SITES_PR}"
fi
done
for jsons in _data/provincias/*.json; do
if [ -f "$jsons" ]; then
SITES_PR=`cat ${jsons} | jq -r .webs[].url`
SITES="${SITES}
${SITES_PR}"
fi
done
for jsons in _data/comunidades/*.json; do
if [ -f "$jsons" ]; then
SITES_PR=`cat ${jsons} | jq -r .webs[].url`
SITES="${SITES}
${SITES_PR}"
fi
done
for site in $SITES; do
# see https://github.com/mozilla/http-observatory/blob/master/httpobs/docs/api.md
echo "Scanning $site using Mozilla HTTP Observatory API"
curl -s -X POST "https://http-observatory.security.mozilla.org/api/v1/analyze?host=$site&rescan=true"
done
sleep 60 # XXX polling until mozilla finishes scanning the site might be more elegant
rm _data/results/*.json
rm _data/results/history/*.json
for site in $SITES; do
echo "Requesting $site scan history"
curl -s -X GET "https://http-observatory.security.mozilla.org/api/v1/getHostHistory?host=$site" | jq . > _data/results/history/${site//./!}.json
echo "Requesting $site scan results"
curl -s -X GET "https://http-observatory.security.mozilla.org/api/v1/analyze?host=$site" | jq . > _data/results/${site//./!}.json
done