This commit is contained in:
parent
b50c796556
commit
c50ecac1de
16
.drone.yml
16
.drone.yml
|
@ -11,12 +11,22 @@ steps:
|
||||||
- apk update && apk add wget jq tar gzip bash util-linux
|
- apk update && apk add wget jq tar gzip bash util-linux
|
||||||
- pip3 install csvkit
|
- pip3 install csvkit
|
||||||
- cd scripts
|
- cd scripts
|
||||||
- chmod +x concessions.sh
|
- chmod +x ./*.sh
|
||||||
- ./concessions.sh
|
- ./concessions.sh
|
||||||
- name: Release concessions data
|
- name: Download calls
|
||||||
|
image: python:alpine
|
||||||
|
environment:
|
||||||
|
COOKIE:
|
||||||
|
from_secret: cookie
|
||||||
|
commands:
|
||||||
|
- apk update && apk add wget jq tar gzip bash util-linux
|
||||||
|
- cd scripts
|
||||||
|
- chmod +x ./*.sh
|
||||||
|
- ./calls.sh
|
||||||
|
- name: Release data
|
||||||
image: plugins/gitea-release
|
image: plugins/gitea-release
|
||||||
settings:
|
settings:
|
||||||
api_key:
|
api_key:
|
||||||
from_secret: api_key
|
from_secret: api_key
|
||||||
base_url: https://git.cuernodehipnos.es
|
base_url: https://git.cuernodehipnos.es
|
||||||
files: concessions.tar.gz
|
files: ./*.tar.gz
|
||||||
|
|
99
scripts/calls.sh
Executable file
99
scripts/calls.sh
Executable file
|
@ -0,0 +1,99 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -eE
|
||||||
|
|
||||||
|
ROWS=50000000
|
||||||
|
PAGES=0
|
||||||
|
ND=$(date +%s%N | cut -b1-13)
|
||||||
|
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
|
||||||
|
F_CALLS='calls.csv'
|
||||||
|
F_TAR_GZ='calls.tar.gz'
|
||||||
|
F_COOKIES='cookies.txt'
|
||||||
|
APP=(wget jq csvgrep tar gzip)
|
||||||
|
|
||||||
|
function getCookies() {
|
||||||
|
echo "Getting cookies..."
|
||||||
|
|
||||||
|
wget -qO- --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetchConcessions() {
|
||||||
|
local outputFile="page_$(printf %03d "$1").json"
|
||||||
|
|
||||||
|
echo "Downloading page $1..."
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc";
|
||||||
|
else
|
||||||
|
wget --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
export -f fetchConcessions
|
||||||
|
|
||||||
|
function getNumPages() {
|
||||||
|
local page=1
|
||||||
|
local outputFile="page_$(printf %03d $page).json"
|
||||||
|
|
||||||
|
echo "Getting number of pages..."
|
||||||
|
|
||||||
|
fetchConcessions $page
|
||||||
|
|
||||||
|
PAGES=$(jq '.total' "$outputFile")
|
||||||
|
local records=$(jq '.records' "$outputFile")
|
||||||
|
|
||||||
|
echo "Total pages: $PAGES"
|
||||||
|
echo "Total records: $records"
|
||||||
|
|
||||||
|
if [[ $PAGES -le 0 ]]; then
|
||||||
|
errorHandler "Cannot get number of pages" "$LINENO"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllCalls() {
|
||||||
|
echo "Downloading all calls..."
|
||||||
|
|
||||||
|
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page"
|
||||||
|
}
|
||||||
|
|
||||||
|
function convertJson2Csv() {
|
||||||
|
echo "Converting JSON to CSV..."
|
||||||
|
|
||||||
|
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressData() {
|
||||||
|
echo "Compressing data..."
|
||||||
|
|
||||||
|
tar -czf $F_TAR_GZ $F_CALLS
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTempFiles() {
|
||||||
|
echo "Cleaning temporary files..."
|
||||||
|
|
||||||
|
rm -f ./*.json $F_COOKIES
|
||||||
|
}
|
||||||
|
|
||||||
|
function errorHandler() {
|
||||||
|
echo "Error: ($1) occurred on line $2"
|
||||||
|
cleanTempFiles
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
trap 'errorHandler $? $LINENO' ERR
|
||||||
|
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
|
||||||
|
|
||||||
|
for element in "${APP[@]}"; do
|
||||||
|
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
echo "Manual cookie is unset";
|
||||||
|
getCookies;
|
||||||
|
else
|
||||||
|
echo "Manual cookie is set. Skipping get cookie step";
|
||||||
|
fi
|
||||||
|
|
||||||
|
getNumPages
|
||||||
|
getAllCalls
|
||||||
|
convertJson2Csv
|
||||||
|
compressData
|
||||||
|
cleanTempFiles
|
|
@ -79,7 +79,7 @@ function compressData() {
|
||||||
function cleanTempFiles() {
|
function cleanTempFiles() {
|
||||||
echo "Cleaning temporary files..."
|
echo "Cleaning temporary files..."
|
||||||
|
|
||||||
rm -f *.json $F_COOKIES
|
rm -f ./*.json $F_COOKIES
|
||||||
}
|
}
|
||||||
|
|
||||||
function errorHandler() {
|
function errorHandler() {
|
||||||
|
@ -104,7 +104,7 @@ fi
|
||||||
|
|
||||||
getNumPages
|
getNumPages
|
||||||
getAllConcessions
|
getAllConcessions
|
||||||
convertJSON2CSV
|
convertJson2Csv
|
||||||
getAllLegal
|
getAllLegal
|
||||||
compressData
|
compressData
|
||||||
cleanTempFiles
|
cleanTempFiles
|
||||||
|
|
Loading…
Reference in New Issue
Block a user