#!/usr/bin/env bash set -eE ROWS=50000 PAGES=0 ND=$(date +%s%N | cut -b1-13) USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0' F_CONCESSIONS='concessions.csv' F_LEGAL='legal.csv' F_TAR_GZ='concessions.tar.gz' F_COOKIES='cookies.txt' APP=(wget jq csvgrep tar gzip) function getCookies() { echo "Getting cookies..." wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null } function fetchConcessions() { local outputFile="page_$(printf %03d "$1").json" echo "Downloading page $1 of $2..." if [ -z ${COOKIE+x} ]; then wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc" else wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc" fi } export -f fetchConcessions function getNumPages() { local page=1 local outputFile="page_$(printf %03d $page).json" echo "Getting number of pages..." fetchConcessions $page $PAGES PAGES=$(jq '.total' "$outputFile") local records=$(jq '.records' "$outputFile") echo "Total pages: $PAGES" echo "Total records: $records" if [[ $PAGES -le 0 ]]; then errorHandler "Cannot get number of pages" "$LINENO" fi } function getAllConcessions() { echo "Downloading all concessions..." seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page $PAGES" } function convertJson2Csv() { echo "Converting JSON to CSV..." for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CONCESSIONS } function getAllLegal() { echo "Extracting legal concessions..." csvgrep --no-header-row \ --columns 10 \ --regex '^[A-HJP-SUV]\d{7}[0-9A-J] ' \ $F_CONCESSIONS > $F_LEGAL } function compressData() { echo "Compressing data..." tar -czf $F_TAR_GZ $F_CONCESSIONS $F_LEGAL } function cleanTempFiles() { echo "Cleaning temporary files..." rm -f ./*.json $F_COOKIES } function errorHandler() { echo "Error: ($1) occurred on line $2" cleanTempFiles exit 1 } trap 'errorHandler $? $LINENO' ERR trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT for element in "${APP[@]}"; do [ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO" done if [ -z ${COOKIE+x} ]; then echo "Manual cookie is unset"; getCookies; else echo "Manual cookie is set. Skipping get cookie step"; fi getNumPages getAllConcessions convertJson2Csv getAllLegal compressData cleanTempFiles