Compare commits

...

25 Commits

Author SHA1 Message Date
Amab bcec4f36d7 Re-enable all steps on pipeline
continuous-integration/drone/push Build is failing Details
2023-01-22 18:25:07 +01:00
Amab 9bf87100ba Remove Release data step dependencies temporarily
continuous-integration/drone/push Build encountered an error Details
2023-01-22 11:01:02 +01:00
Amab 51f3425150 Disable Download concessions step temporarily
continuous-integration/drone/push Build encountered an error Details
2023-01-22 09:06:17 +01:00
Amab c236cdb420 Fix code style
continuous-integration/drone/push Build was killed Details
2023-01-21 23:29:20 +01:00
Amab 2abd83dcd7 Remove blank line
continuous-integration/drone/push Build was killed Details
2023-01-21 20:57:13 +01:00
Amab fbc9bbc3b4 Revert "Disable Download concessions step temporarily"
continuous-integration/drone/push Build encountered an error Details
This reverts commit b7f53cfd92.
2023-01-21 19:18:20 +01:00
Amab b7f53cfd92 Disable Download concessions step temporarily
continuous-integration/drone/push Build encountered an error Details
2023-01-21 16:37:52 +01:00
Amab c90d00b005 Revert "Disable Download concessions step temporarily"
continuous-integration/drone/push Build was killed Details
This reverts commit 3094902af1.
2023-01-21 16:32:43 +01:00
Amab ce76d4c0cb Revert "Disable Download concessions step temporarily"
This reverts commit f341b4eaa4.
2023-01-21 16:32:40 +01:00
Amab f341b4eaa4 Disable Download concessions step temporarily
continuous-integration/drone/push Build was killed Details
2023-01-21 16:29:05 +01:00
Amab 3094902af1 Disable Download concessions step temporarily
continuous-integration/drone/push Build was killed Details
2023-01-21 16:26:37 +01:00
Amab 3c10992282 Run wget on quiet mode
continuous-integration/drone/push Build was killed Details
2023-01-21 16:22:23 +01:00
Amab c9f0af7dce Remove unnecessary dependency check from calls script
continuous-integration/drone/push Build encountered an error Details
2023-01-21 14:15:54 +01:00
Amab 8b53295dbb Parallelize pipeline
continuous-integration/drone/push Build was killed Details
2023-01-21 14:13:10 +01:00
Amab c50ecac1de Add calls download script
continuous-integration/drone/push Build is failing Details
2023-01-21 14:07:21 +01:00
Amab b50c796556 Fix pipeline environment variables
continuous-integration/drone/push Build is failing Details
2023-01-21 13:14:02 +01:00
Amab dec20b2659 Fix pipeline environment variables
continuous-integration/drone/push Build is failing Details
2023-01-21 13:10:13 +01:00
Amab 9b6c7b7d36 Fix cookie environment variable name in pipeline config
continuous-integration/drone/push Build is failing Details
2023-01-21 13:04:19 +01:00
Amab ac9eeb026a Fix pipeline environment variables
continuous-integration/drone/push Build is failing Details
2023-01-21 13:00:19 +01:00
Amab 68c5e4acb2 Add util-linux package to pipeline dependencies
continuous-integration/drone/push Build is failing Details
2023-01-21 12:51:11 +01:00
Amab 1934626a34 Remove root user check
continuous-integration/drone/push Build is failing Details
2023-01-21 12:48:11 +01:00
Amab 37d9dc78dd Add bash package to pipeline dependencies
continuous-integration/drone/push Build is failing Details
2023-01-21 12:45:15 +01:00
Amab d90c5c21b1 Remove Drone CI trigger
continuous-integration/drone/push Build is failing Details
2023-01-21 12:38:30 +01:00
Amab 3cc666e4d8 Rename Drone CI config file 2023-01-21 12:32:26 +01:00
Amab a38da53f8a Add concessions download script 2023-01-21 12:27:26 +01:00
5 changed files with 375 additions and 1 deletions

35
.drone.yml Normal file
View File

@ -0,0 +1,35 @@
kind: pipeline
name: default
steps:
- name: Download concessions
image: python:alpine
environment:
COOKIE:
from_secret: cookie
commands:
- apk update && apk add wget jq tar gzip bash util-linux
- pip3 install csvkit
- cd scripts
- chmod +x ./*.sh
- ./concessions.sh
- name: Download calls
image: python:alpine
environment:
COOKIE:
from_secret: cookie
commands:
- apk update && apk add wget jq tar gzip bash util-linux
- cd scripts
- chmod +x ./*.sh
- ./calls.sh
- name: Release data
image: plugins/gitea-release
settings:
api_key:
from_secret: api_key
base_url: https://git.cuernodehipnos.es
files: ./*.tar.gz
depends_on:
- Download concessions
- Download calls

93
.gitignore vendored
View File

@ -1,2 +1,93 @@
.DS_Store
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all
### Intellij+all ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### Intellij+all Patch ###
# Ignore everything but code style settings and run configurations
# that are supposed to be shared within teams.
.idea/*
!.idea/codeStyles
!.idea/runConfigurations
# End of https://www.toptal.com/developers/gitignore/api/intellij+all
/.idea/codeStyles/codeStyleConfig.xml

View File

@ -0,0 +1,39 @@
-----BEGIN CERTIFICATE-----
MIIG1jCCBL6gAwIBAgIQNMarBE42mRJRyCULbJTWwDANBgkqhkiG9w0BAQsFADA7
MQswCQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xGTAXBgNVBAsMEEFDIFJB
SVogRk5NVC1SQ00wHhcNMTMwNjI0MTA1MjU5WhcNMjgwNjI0MTA1MjU5WjBHMQsw
CQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xJTAjBgNVBAsMHEFDIENvbXBv
bmVudGVzIEluZm9ybcOhdGljb3MwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
AoIBAQCXVx8rdbF7/xY44CaSqzzGo5BhvzA8knxC/3KJYVzTf+CkOvMxMUDub8b0
h38MDujm/RKZhBNOWbKhxF3U61ZVhcR9xOCciuS/soT80m3BByxAKcZsNka0jCA4
XRkglDaAFxCHEZ06MOnvXsSOZDfPYahbQ3VFCVycJuhlHdAwSpmceQwcRYkR6YgX
wTiyzCNGivMKAmRS3dItqDOmDW/nxiDFq/Jd8VWY7GFkwbbAeqYId8FjN8zfvafu
nsB9SLFkUjPPMeqfmC7Bdh7HMxLpaOXROwH201cmlebiPkn0xSFxXFqwhhr6yN8U
QYZ3O/+xdHLrS6DS9+CJUF6d09ijAgMBAAGjggLIMIICxDASBgNVHRMBAf8ECDAG
AQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUGfhYLxTWpsybBJgIDUzX
qwCng2UwgZgGCCsGAQUFBwEBBIGLMIGIMEkGCCsGAQUFBzABhj1odHRwOi8vb2Nz
cGZubXRyY21jYS5jZXJ0LmZubXQuZXMvb2NzcGZubXRyY21jYS9PY3NwUmVzcG9u
ZGVyMDsGCCsGAQUFBzAChi9odHRwOi8vd3d3LmNlcnQuZm5tdC5lcy9jZXJ0cy9B
Q1JBSVpGTk1UUkNNLmNydDAfBgNVHSMEGDAWgBT3fcX9xOiaG3dkp/UdoMy/h2Ca
bTCB6wYDVR0gBIHjMIHgMIHdBgRVHSAAMIHUMCkGCCsGAQUFBwIBFh1odHRwOi8v
d3d3LmNlcnQuZm5tdC5lcy9kcGNzLzCBpgYIKwYBBQUHAgIwgZkMgZZTdWpldG8g
YSBsYXMgY29uZGljaW9uZXMgZGUgdXNvIGV4cHVlc3RhcyBlbiBsYSBEZWNsYXJh
Y2nDs24gZGUgUHLDoWN0aWNhcyBkZSBDZXJ0aWZpY2FjacOzbiBkZSBsYSBGTk1U
LVJDTSAoIEMvIEpvcmdlIEp1YW4sIDEwNi0yODAwOS1NYWRyaWQtRXNwYcOxYSkw
gdQGA1UdHwSBzDCByTCBxqCBw6CBwIaBkGxkYXA6Ly9sZGFwZm5tdC5jZXJ0LmZu
bXQuZXMvQ049Q1JMLE9VPUFDJTIwUkFJWiUyMEZOTVQtUkNNLE89Rk5NVC1SQ00s
Qz1FUz9hdXRob3JpdHlSZXZvY2F0aW9uTGlzdDtiaW5hcnk/YmFzZT9vYmplY3Rj
bGFzcz1jUkxEaXN0cmlidXRpb25Qb2ludIYraHR0cDovL3d3dy5jZXJ0LmZubXQu
ZXMvY3Jscy9BUkxGTk1UUkNNLmNybDANBgkqhkiG9w0BAQsFAAOCAgEAo2bsQ2xL
Dcyodieqjd+uy/lfxDw/MbrAq/ZaNFkIlcypUYamOM4vrm5rz8oLjPCoLkJ48P+n
P08Gkcl5Q6q6VFcZLia+U3gfHXrkyqToQlrtViGCGH3xA4u56XtMHGXSdk9vQ0yD
nW5f7bUEkp+uvcKewrOvNcpbIAgD4eU7gdOS0w7BagcFRBgTKBw2s3z73fRZtouJ
g/atmWYtXbBsfNjph+pCh+h5sbSyZUVzO5AemyjpYYYNMWDQrTXq+7O8zIPuPaNE
SjEexuzn+VjHG90RlUK1LygARi+Ir0opD2w6erb/hK8Eea7MFdKQ2ASqNBGJggNo
5vfPVvjHiL+Antmh7mQSKL+4YwFU64d4KK9k0C1mbJethDQFKcjTK1vMvnXFiups
IuyTqwKauo7u2zMKzY4r3VYOW9TpMyLPFIY8pII5GyNzXlL0F4nscOvduTEPEYqx
eNJfpDDPY/DO8WfxgdRTy2W3D/UoAulb+Y+nuzGGCtFQrsSMQX487R+aY0nWot/h
ajef6BcPuxhDfQrg5IafrISVmcJAplb3tXhh0sz7RbYz6jf1bke4eU5fnrTMtGlV
teUL2vjrfUPHW07kBJuaQ7sxORNV3bpHisOnHj+AriQzCn5vINpSHW6hTm7IfRkb
ltu/aQrsMuUhP7HE/v+uXe5CuboV5ubZhHU=
-----END CERTIFICATE-----

99
scripts/calls.sh Executable file
View File

@ -0,0 +1,99 @@
#!/usr/bin/env bash
set -eE
ROWS=50000000
PAGES=0
ND=$(date +%s%N | cut -b1-13)
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
F_CALLS='calls.csv'
F_TAR_GZ='calls.tar.gz'
F_COOKIES='cookies.txt'
APP=(wget jq tar gzip)
function getCookies() {
echo "Getting cookies..."
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
}
function fetchCalls() {
local outputFile="page_$(printf %03d "$1").json"
echo "Downloading page $1 of $2..."
if [ -z ${COOKIE+x} ]; then
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
else
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
fi
}
export -f fetchCalls
function getNumPages() {
local page=1
local outputFile="page_$(printf %03d $page).json"
echo "Getting number of pages..."
fetchCalls $page $PAGES
PAGES=$(jq '.total' "$outputFile")
local records=$(jq '.records' "$outputFile")
echo "Total pages: $PAGES"
echo "Total records: $records"
if [[ $PAGES -le 0 ]]; then
errorHandler "Cannot get number of pages" "$LINENO"
fi
}
function getAllCalls() {
echo "Downloading all calls..."
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchCalls page $PAGES"
}
function convertJson2Csv() {
echo "Converting JSON to CSV..."
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
}
function compressData() {
echo "Compressing data..."
tar -czf $F_TAR_GZ $F_CALLS
}
function cleanTempFiles() {
echo "Cleaning temporary files..."
rm -f ./*.json $F_COOKIES
}
function errorHandler() {
echo "Error: ($1) occurred on line $2"
cleanTempFiles
exit 1
}
trap 'errorHandler $? $LINENO' ERR
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
for element in "${APP[@]}"; do
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
done
if [ -z ${COOKIE+x} ]; then
echo "Manual cookie is unset";
getCookies;
else
echo "Manual cookie is set. Skipping get cookie step";
fi
getNumPages
getAllCalls
convertJson2Csv
compressData
cleanTempFiles

110
scripts/concessions.sh Executable file
View File

@ -0,0 +1,110 @@
#!/usr/bin/env bash
set -eE
ROWS=50000
PAGES=0
ND=$(date +%s%N | cut -b1-13)
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
F_CONCESSIONS='concessions.csv'
F_LEGAL='legal.csv'
F_TAR_GZ='concessions.tar.gz'
F_COOKIES='cookies.txt'
APP=(wget jq csvgrep tar gzip)
function getCookies() {
echo "Getting cookies..."
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
}
function fetchConcessions() {
local outputFile="page_$(printf %03d "$1").json"
echo "Downloading page $1 of $2..."
if [ -z ${COOKIE+x} ]; then
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
else
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
fi
}
export -f fetchConcessions
function getNumPages() {
local page=1
local outputFile="page_$(printf %03d $page).json"
echo "Getting number of pages..."
fetchConcessions $page $PAGES
PAGES=$(jq '.total' "$outputFile")
local records=$(jq '.records' "$outputFile")
echo "Total pages: $PAGES"
echo "Total records: $records"
if [[ $PAGES -le 0 ]]; then
errorHandler "Cannot get number of pages" "$LINENO"
fi
}
function getAllConcessions() {
echo "Downloading all concessions..."
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page $PAGES"
}
function convertJson2Csv() {
echo "Converting JSON to CSV..."
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CONCESSIONS
}
function getAllLegal() {
echo "Extracting legal concessions..."
csvgrep --no-header-row \
--columns 10 \
--regex '^[A-HJP-SUV]\d{7}[0-9A-J] ' \
$F_CONCESSIONS > $F_LEGAL
}
function compressData() {
echo "Compressing data..."
tar -czf $F_TAR_GZ $F_CONCESSIONS $F_LEGAL
}
function cleanTempFiles() {
echo "Cleaning temporary files..."
rm -f ./*.json $F_COOKIES
}
function errorHandler() {
echo "Error: ($1) occurred on line $2"
cleanTempFiles
exit 1
}
trap 'errorHandler $? $LINENO' ERR
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
for element in "${APP[@]}"; do
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
done
if [ -z ${COOKIE+x} ]; then
echo "Manual cookie is unset";
getCookies;
else
echo "Manual cookie is set. Skipping get cookie step";
fi
getNumPages
getAllConcessions
convertJson2Csv
getAllLegal
compressData
cleanTempFiles