From c50ecac1de1ffdee28834e958a46b6d05ba719e5 Mon Sep 17 00:00:00 2001 From: Amab Date: Sat, 21 Jan 2023 14:07:21 +0100 Subject: [PATCH] Add calls download script --- .drone.yml | 16 +++++-- scripts/calls.sh | 99 ++++++++++++++++++++++++++++++++++++++++++ scripts/concessions.sh | 4 +- 3 files changed, 114 insertions(+), 5 deletions(-) create mode 100755 scripts/calls.sh diff --git a/.drone.yml b/.drone.yml index 0031754..9303cb4 100644 --- a/.drone.yml +++ b/.drone.yml @@ -11,12 +11,22 @@ steps: - apk update && apk add wget jq tar gzip bash util-linux - pip3 install csvkit - cd scripts - - chmod +x concessions.sh + - chmod +x ./*.sh - ./concessions.sh - - name: Release concessions data + - name: Download calls + image: python:alpine + environment: + COOKIE: + from_secret: cookie + commands: + - apk update && apk add wget jq tar gzip bash util-linux + - cd scripts + - chmod +x ./*.sh + - ./calls.sh + - name: Release data image: plugins/gitea-release settings: api_key: from_secret: api_key base_url: https://git.cuernodehipnos.es - files: concessions.tar.gz + files: ./*.tar.gz diff --git a/scripts/calls.sh b/scripts/calls.sh new file mode 100755 index 0000000..108e5df --- /dev/null +++ b/scripts/calls.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +set -eE + +ROWS=50000000 +PAGES=0 +ND=$(date +%s%N | cut -b1-13) +USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0' +F_CALLS='calls.csv' +F_TAR_GZ='calls.tar.gz' +F_COOKIES='cookies.txt' +APP=(wget jq csvgrep tar gzip) + +function getCookies() { + echo "Getting cookies..." + + wget -qO- --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null +} + +function fetchConcessions() { + local outputFile="page_$(printf %03d "$1").json" + + echo "Downloading page $1..." + + if [ -z ${COOKIE+x} ]; then + wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"; + else + wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc" + fi +} +export -f fetchConcessions + +function getNumPages() { + local page=1 + local outputFile="page_$(printf %03d $page).json" + + echo "Getting number of pages..." + + fetchConcessions $page + + PAGES=$(jq '.total' "$outputFile") + local records=$(jq '.records' "$outputFile") + + echo "Total pages: $PAGES" + echo "Total records: $records" + + if [[ $PAGES -le 0 ]]; then + errorHandler "Cannot get number of pages" "$LINENO" + fi +} + +function getAllCalls() { + echo "Downloading all calls..." + + seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page" +} + +function convertJson2Csv() { + echo "Converting JSON to CSV..." + + for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS +} + +function compressData() { + echo "Compressing data..." + + tar -czf $F_TAR_GZ $F_CALLS +} + +function cleanTempFiles() { + echo "Cleaning temporary files..." + + rm -f ./*.json $F_COOKIES +} + +function errorHandler() { + echo "Error: ($1) occurred on line $2" + cleanTempFiles + exit 1 +} + +trap 'errorHandler $? $LINENO' ERR +trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT + +for element in "${APP[@]}"; do + [ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO" +done + +if [ -z ${COOKIE+x} ]; then + echo "Manual cookie is unset"; + getCookies; +else + echo "Manual cookie is set. Skipping get cookie step"; +fi + +getNumPages +getAllCalls +convertJson2Csv +compressData +cleanTempFiles diff --git a/scripts/concessions.sh b/scripts/concessions.sh index bafeb48..fc8978f 100755 --- a/scripts/concessions.sh +++ b/scripts/concessions.sh @@ -79,7 +79,7 @@ function compressData() { function cleanTempFiles() { echo "Cleaning temporary files..." - rm -f *.json $F_COOKIES + rm -f ./*.json $F_COOKIES } function errorHandler() { @@ -104,7 +104,7 @@ fi getNumPages getAllConcessions -convertJSON2CSV +convertJson2Csv getAllLegal compressData cleanTempFiles