subvenciones/scripts/calls.sh

100 lines
2.7 KiB
Bash
Executable File

#!/usr/bin/env bash
set -eE
ROWS=50000000
PAGES=0
ND=$(date +%s%N | cut -b1-13)
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
F_CALLS='calls.csv'
F_TAR_GZ='calls.tar.gz'
F_COOKIES='cookies.txt'
APP=(wget jq tar gzip)
function getCookies() {
echo "Getting cookies..."
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
}
function fetchCalls() {
local outputFile="page_$(printf %03d "$1").json"
echo "Downloading page $1 of $2..."
if [ -z ${COOKIE+x} ]; then
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
else
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
fi
}
export -f fetchCalls
function getNumPages() {
local page=1
local outputFile="page_$(printf %03d $page).json"
echo "Getting number of pages..."
fetchCalls $page $PAGES
PAGES=$(jq '.total' "$outputFile")
local records=$(jq '.records' "$outputFile")
echo "Total pages: $PAGES"
echo "Total records: $records"
if [[ $PAGES -le 0 ]]; then
errorHandler "Cannot get number of pages" "$LINENO"
fi
}
function getAllCalls() {
echo "Downloading all calls..."
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchCalls page $PAGES"
}
function convertJson2Csv() {
echo "Converting JSON to CSV..."
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
}
function compressData() {
echo "Compressing data..."
tar -czf $F_TAR_GZ $F_CALLS
}
function cleanTempFiles() {
echo "Cleaning temporary files..."
rm -f ./*.json $F_COOKIES
}
function errorHandler() {
echo "Error: ($1) occurred on line $2"
cleanTempFiles
exit 1
}
trap 'errorHandler $? $LINENO' ERR
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
for element in "${APP[@]}"; do
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
done
if [ -z ${COOKIE+x} ]; then
echo "Manual cookie is unset";
getCookies;
else
echo "Manual cookie is set. Skipping get cookie step";
fi
getNumPages
getAllCalls
convertJson2Csv
compressData
cleanTempFiles