Add calls download script
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Amab 2023-01-21 14:07:21 +01:00
parent b50c796556
commit c50ecac1de
3 changed files with 114 additions and 5 deletions

View File

@ -11,12 +11,22 @@ steps:
- apk update && apk add wget jq tar gzip bash util-linux
- pip3 install csvkit
- cd scripts
- chmod +x concessions.sh
- chmod +x ./*.sh
- ./concessions.sh
- name: Release concessions data
- name: Download calls
image: python:alpine
environment:
COOKIE:
from_secret: cookie
commands:
- apk update && apk add wget jq tar gzip bash util-linux
- cd scripts
- chmod +x ./*.sh
- ./calls.sh
- name: Release data
image: plugins/gitea-release
settings:
api_key:
from_secret: api_key
base_url: https://git.cuernodehipnos.es
files: concessions.tar.gz
files: ./*.tar.gz

99
scripts/calls.sh Executable file
View File

@ -0,0 +1,99 @@
#!/usr/bin/env bash
set -eE
ROWS=50000000
PAGES=0
ND=$(date +%s%N | cut -b1-13)
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
F_CALLS='calls.csv'
F_TAR_GZ='calls.tar.gz'
F_COOKIES='cookies.txt'
APP=(wget jq csvgrep tar gzip)
function getCookies() {
echo "Getting cookies..."
wget -qO- --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
}
function fetchConcessions() {
local outputFile="page_$(printf %03d "$1").json"
echo "Downloading page $1..."
if [ -z ${COOKIE+x} ]; then
wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc";
else
wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
fi
}
export -f fetchConcessions
function getNumPages() {
local page=1
local outputFile="page_$(printf %03d $page).json"
echo "Getting number of pages..."
fetchConcessions $page
PAGES=$(jq '.total' "$outputFile")
local records=$(jq '.records' "$outputFile")
echo "Total pages: $PAGES"
echo "Total records: $records"
if [[ $PAGES -le 0 ]]; then
errorHandler "Cannot get number of pages" "$LINENO"
fi
}
function getAllCalls() {
echo "Downloading all calls..."
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page"
}
function convertJson2Csv() {
echo "Converting JSON to CSV..."
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
}
function compressData() {
echo "Compressing data..."
tar -czf $F_TAR_GZ $F_CALLS
}
function cleanTempFiles() {
echo "Cleaning temporary files..."
rm -f ./*.json $F_COOKIES
}
function errorHandler() {
echo "Error: ($1) occurred on line $2"
cleanTempFiles
exit 1
}
trap 'errorHandler $? $LINENO' ERR
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
for element in "${APP[@]}"; do
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
done
if [ -z ${COOKIE+x} ]; then
echo "Manual cookie is unset";
getCookies;
else
echo "Manual cookie is set. Skipping get cookie step";
fi
getNumPages
getAllCalls
convertJson2Csv
compressData
cleanTempFiles

View File

@ -79,7 +79,7 @@ function compressData() {
function cleanTempFiles() {
echo "Cleaning temporary files..."
rm -f *.json $F_COOKIES
rm -f ./*.json $F_COOKIES
}
function errorHandler() {
@ -104,7 +104,7 @@ fi
getNumPages
getAllConcessions
convertJSON2CSV
convertJson2Csv
getAllLegal
compressData
cleanTempFiles