#!/usr/bin/env bash set -eE ROWS=50000000 PAGES=0 ND=$(date +%s%N | cut -b1-13) USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0' F_CALLS='calls.csv' F_TAR_GZ='calls.tar.gz' F_COOKIES='cookies.txt' APP=(wget jq tar gzip) function getCookies() { echo "Getting cookies..." wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null } function fetchCalls() { local outputFile="page_$(printf %03d "$1").json" echo "Downloading page $1 of $2..." if [ -z ${COOKIE+x} ]; then wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc" else wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc" fi } export -f fetchCalls function getNumPages() { local page=1 local outputFile="page_$(printf %03d $page).json" echo "Getting number of pages..." fetchCalls $page $PAGES PAGES=$(jq '.total' "$outputFile") local records=$(jq '.records' "$outputFile") echo "Total pages: $PAGES" echo "Total records: $records" if [[ $PAGES -le 0 ]]; then errorHandler "Cannot get number of pages" "$LINENO" fi } function getAllCalls() { echo "Downloading all calls..." seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchCalls page $PAGES" } function convertJson2Csv() { echo "Converting JSON to CSV..." for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS } function compressData() { echo "Compressing data..." tar -czf $F_TAR_GZ $F_CALLS } function cleanTempFiles() { echo "Cleaning temporary files..." rm -f ./*.json $F_COOKIES } function errorHandler() { echo "Error: ($1) occurred on line $2" cleanTempFiles exit 1 } trap 'errorHandler $? $LINENO' ERR trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT for element in "${APP[@]}"; do [ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO" done if [ -z ${COOKIE+x} ]; then echo "Manual cookie is unset"; getCookies; else echo "Manual cookie is set. Skipping get cookie step"; fi getNumPages getAllCalls convertJson2Csv compressData cleanTempFiles