Compare commits
25 Commits
main
...
feature/do
Author | SHA1 | Date |
---|---|---|
Amab | bcec4f36d7 | |
Amab | 9bf87100ba | |
Amab | 51f3425150 | |
Amab | c236cdb420 | |
Amab | 2abd83dcd7 | |
Amab | fbc9bbc3b4 | |
Amab | b7f53cfd92 | |
Amab | c90d00b005 | |
Amab | ce76d4c0cb | |
Amab | f341b4eaa4 | |
Amab | 3094902af1 | |
Amab | 3c10992282 | |
Amab | c9f0af7dce | |
Amab | 8b53295dbb | |
Amab | c50ecac1de | |
Amab | b50c796556 | |
Amab | dec20b2659 | |
Amab | 9b6c7b7d36 | |
Amab | ac9eeb026a | |
Amab | 68c5e4acb2 | |
Amab | 1934626a34 | |
Amab | 37d9dc78dd | |
Amab | d90c5c21b1 | |
Amab | 3cc666e4d8 | |
Amab | a38da53f8a |
|
@ -0,0 +1,35 @@
|
||||||
|
kind: pipeline
|
||||||
|
name: default
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Download concessions
|
||||||
|
image: python:alpine
|
||||||
|
environment:
|
||||||
|
COOKIE:
|
||||||
|
from_secret: cookie
|
||||||
|
commands:
|
||||||
|
- apk update && apk add wget jq tar gzip bash util-linux
|
||||||
|
- pip3 install csvkit
|
||||||
|
- cd scripts
|
||||||
|
- chmod +x ./*.sh
|
||||||
|
- ./concessions.sh
|
||||||
|
- name: Download calls
|
||||||
|
image: python:alpine
|
||||||
|
environment:
|
||||||
|
COOKIE:
|
||||||
|
from_secret: cookie
|
||||||
|
commands:
|
||||||
|
- apk update && apk add wget jq tar gzip bash util-linux
|
||||||
|
- cd scripts
|
||||||
|
- chmod +x ./*.sh
|
||||||
|
- ./calls.sh
|
||||||
|
- name: Release data
|
||||||
|
image: plugins/gitea-release
|
||||||
|
settings:
|
||||||
|
api_key:
|
||||||
|
from_secret: api_key
|
||||||
|
base_url: https://git.cuernodehipnos.es
|
||||||
|
files: ./*.tar.gz
|
||||||
|
depends_on:
|
||||||
|
- Download concessions
|
||||||
|
- Download calls
|
|
@ -1,2 +1,93 @@
|
||||||
.DS_Store
|
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all
|
||||||
|
|
||||||
|
### Intellij+all ###
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
.idea/**/workspace.xml
|
||||||
|
.idea/**/tasks.xml
|
||||||
|
.idea/**/usage.statistics.xml
|
||||||
|
.idea/**/dictionaries
|
||||||
|
.idea/**/shelf
|
||||||
|
|
||||||
|
# AWS User-specific
|
||||||
|
.idea/**/aws.xml
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
.idea/**/contentModel.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files
|
||||||
|
.idea/**/dataSources/
|
||||||
|
.idea/**/dataSources.ids
|
||||||
|
.idea/**/dataSources.local.xml
|
||||||
|
.idea/**/sqlDataSources.xml
|
||||||
|
.idea/**/dynamic.xml
|
||||||
|
.idea/**/uiDesigner.xml
|
||||||
|
.idea/**/dbnavigator.xml
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.idea/**/gradle.xml
|
||||||
|
.idea/**/libraries
|
||||||
|
|
||||||
|
# Gradle and Maven with auto-import
|
||||||
|
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||||
|
# since they will be recreated, and may cause churn. Uncomment if using
|
||||||
|
# auto-import.
|
||||||
|
# .idea/artifacts
|
||||||
|
# .idea/compiler.xml
|
||||||
|
# .idea/jarRepositories.xml
|
||||||
|
# .idea/modules.xml
|
||||||
|
# .idea/*.iml
|
||||||
|
# .idea/modules
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
cmake-build-*/
|
||||||
|
|
||||||
|
# Mongo Explorer plugin
|
||||||
|
.idea/**/mongoSettings.xml
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Cursive Clojure plugin
|
||||||
|
.idea/replstate.xml
|
||||||
|
|
||||||
|
# SonarLint plugin
|
||||||
|
.idea/sonarlint/
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
||||||
|
# Editor-based Rest Client
|
||||||
|
.idea/httpRequests
|
||||||
|
|
||||||
|
# Android studio 3.1+ serialized cache file
|
||||||
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
|
### Intellij+all Patch ###
|
||||||
|
# Ignore everything but code style settings and run configurations
|
||||||
|
# that are supposed to be shared within teams.
|
||||||
|
|
||||||
|
.idea/*
|
||||||
|
|
||||||
|
!.idea/codeStyles
|
||||||
|
!.idea/runConfigurations
|
||||||
|
|
||||||
|
# End of https://www.toptal.com/developers/gitignore/api/intellij+all
|
||||||
|
/.idea/codeStyles/codeStyleConfig.xml
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIG1jCCBL6gAwIBAgIQNMarBE42mRJRyCULbJTWwDANBgkqhkiG9w0BAQsFADA7
|
||||||
|
MQswCQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xGTAXBgNVBAsMEEFDIFJB
|
||||||
|
SVogRk5NVC1SQ00wHhcNMTMwNjI0MTA1MjU5WhcNMjgwNjI0MTA1MjU5WjBHMQsw
|
||||||
|
CQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xJTAjBgNVBAsMHEFDIENvbXBv
|
||||||
|
bmVudGVzIEluZm9ybcOhdGljb3MwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
|
||||||
|
AoIBAQCXVx8rdbF7/xY44CaSqzzGo5BhvzA8knxC/3KJYVzTf+CkOvMxMUDub8b0
|
||||||
|
h38MDujm/RKZhBNOWbKhxF3U61ZVhcR9xOCciuS/soT80m3BByxAKcZsNka0jCA4
|
||||||
|
XRkglDaAFxCHEZ06MOnvXsSOZDfPYahbQ3VFCVycJuhlHdAwSpmceQwcRYkR6YgX
|
||||||
|
wTiyzCNGivMKAmRS3dItqDOmDW/nxiDFq/Jd8VWY7GFkwbbAeqYId8FjN8zfvafu
|
||||||
|
nsB9SLFkUjPPMeqfmC7Bdh7HMxLpaOXROwH201cmlebiPkn0xSFxXFqwhhr6yN8U
|
||||||
|
QYZ3O/+xdHLrS6DS9+CJUF6d09ijAgMBAAGjggLIMIICxDASBgNVHRMBAf8ECDAG
|
||||||
|
AQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUGfhYLxTWpsybBJgIDUzX
|
||||||
|
qwCng2UwgZgGCCsGAQUFBwEBBIGLMIGIMEkGCCsGAQUFBzABhj1odHRwOi8vb2Nz
|
||||||
|
cGZubXRyY21jYS5jZXJ0LmZubXQuZXMvb2NzcGZubXRyY21jYS9PY3NwUmVzcG9u
|
||||||
|
ZGVyMDsGCCsGAQUFBzAChi9odHRwOi8vd3d3LmNlcnQuZm5tdC5lcy9jZXJ0cy9B
|
||||||
|
Q1JBSVpGTk1UUkNNLmNydDAfBgNVHSMEGDAWgBT3fcX9xOiaG3dkp/UdoMy/h2Ca
|
||||||
|
bTCB6wYDVR0gBIHjMIHgMIHdBgRVHSAAMIHUMCkGCCsGAQUFBwIBFh1odHRwOi8v
|
||||||
|
d3d3LmNlcnQuZm5tdC5lcy9kcGNzLzCBpgYIKwYBBQUHAgIwgZkMgZZTdWpldG8g
|
||||||
|
YSBsYXMgY29uZGljaW9uZXMgZGUgdXNvIGV4cHVlc3RhcyBlbiBsYSBEZWNsYXJh
|
||||||
|
Y2nDs24gZGUgUHLDoWN0aWNhcyBkZSBDZXJ0aWZpY2FjacOzbiBkZSBsYSBGTk1U
|
||||||
|
LVJDTSAoIEMvIEpvcmdlIEp1YW4sIDEwNi0yODAwOS1NYWRyaWQtRXNwYcOxYSkw
|
||||||
|
gdQGA1UdHwSBzDCByTCBxqCBw6CBwIaBkGxkYXA6Ly9sZGFwZm5tdC5jZXJ0LmZu
|
||||||
|
bXQuZXMvQ049Q1JMLE9VPUFDJTIwUkFJWiUyMEZOTVQtUkNNLE89Rk5NVC1SQ00s
|
||||||
|
Qz1FUz9hdXRob3JpdHlSZXZvY2F0aW9uTGlzdDtiaW5hcnk/YmFzZT9vYmplY3Rj
|
||||||
|
bGFzcz1jUkxEaXN0cmlidXRpb25Qb2ludIYraHR0cDovL3d3dy5jZXJ0LmZubXQu
|
||||||
|
ZXMvY3Jscy9BUkxGTk1UUkNNLmNybDANBgkqhkiG9w0BAQsFAAOCAgEAo2bsQ2xL
|
||||||
|
Dcyodieqjd+uy/lfxDw/MbrAq/ZaNFkIlcypUYamOM4vrm5rz8oLjPCoLkJ48P+n
|
||||||
|
P08Gkcl5Q6q6VFcZLia+U3gfHXrkyqToQlrtViGCGH3xA4u56XtMHGXSdk9vQ0yD
|
||||||
|
nW5f7bUEkp+uvcKewrOvNcpbIAgD4eU7gdOS0w7BagcFRBgTKBw2s3z73fRZtouJ
|
||||||
|
g/atmWYtXbBsfNjph+pCh+h5sbSyZUVzO5AemyjpYYYNMWDQrTXq+7O8zIPuPaNE
|
||||||
|
SjEexuzn+VjHG90RlUK1LygARi+Ir0opD2w6erb/hK8Eea7MFdKQ2ASqNBGJggNo
|
||||||
|
5vfPVvjHiL+Antmh7mQSKL+4YwFU64d4KK9k0C1mbJethDQFKcjTK1vMvnXFiups
|
||||||
|
IuyTqwKauo7u2zMKzY4r3VYOW9TpMyLPFIY8pII5GyNzXlL0F4nscOvduTEPEYqx
|
||||||
|
eNJfpDDPY/DO8WfxgdRTy2W3D/UoAulb+Y+nuzGGCtFQrsSMQX487R+aY0nWot/h
|
||||||
|
ajef6BcPuxhDfQrg5IafrISVmcJAplb3tXhh0sz7RbYz6jf1bke4eU5fnrTMtGlV
|
||||||
|
teUL2vjrfUPHW07kBJuaQ7sxORNV3bpHisOnHj+AriQzCn5vINpSHW6hTm7IfRkb
|
||||||
|
ltu/aQrsMuUhP7HE/v+uXe5CuboV5ubZhHU=
|
||||||
|
-----END CERTIFICATE-----
|
|
@ -0,0 +1,99 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -eE
|
||||||
|
|
||||||
|
ROWS=50000000
|
||||||
|
PAGES=0
|
||||||
|
ND=$(date +%s%N | cut -b1-13)
|
||||||
|
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
|
||||||
|
F_CALLS='calls.csv'
|
||||||
|
F_TAR_GZ='calls.tar.gz'
|
||||||
|
F_COOKIES='cookies.txt'
|
||||||
|
APP=(wget jq tar gzip)
|
||||||
|
|
||||||
|
function getCookies() {
|
||||||
|
echo "Getting cookies..."
|
||||||
|
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetchCalls() {
|
||||||
|
local outputFile="page_$(printf %03d "$1").json"
|
||||||
|
|
||||||
|
echo "Downloading page $1 of $2..."
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
|
||||||
|
else
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
export -f fetchCalls
|
||||||
|
|
||||||
|
function getNumPages() {
|
||||||
|
local page=1
|
||||||
|
local outputFile="page_$(printf %03d $page).json"
|
||||||
|
|
||||||
|
echo "Getting number of pages..."
|
||||||
|
|
||||||
|
fetchCalls $page $PAGES
|
||||||
|
|
||||||
|
PAGES=$(jq '.total' "$outputFile")
|
||||||
|
local records=$(jq '.records' "$outputFile")
|
||||||
|
|
||||||
|
echo "Total pages: $PAGES"
|
||||||
|
echo "Total records: $records"
|
||||||
|
|
||||||
|
if [[ $PAGES -le 0 ]]; then
|
||||||
|
errorHandler "Cannot get number of pages" "$LINENO"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllCalls() {
|
||||||
|
echo "Downloading all calls..."
|
||||||
|
|
||||||
|
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchCalls page $PAGES"
|
||||||
|
}
|
||||||
|
|
||||||
|
function convertJson2Csv() {
|
||||||
|
echo "Converting JSON to CSV..."
|
||||||
|
|
||||||
|
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressData() {
|
||||||
|
echo "Compressing data..."
|
||||||
|
|
||||||
|
tar -czf $F_TAR_GZ $F_CALLS
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTempFiles() {
|
||||||
|
echo "Cleaning temporary files..."
|
||||||
|
|
||||||
|
rm -f ./*.json $F_COOKIES
|
||||||
|
}
|
||||||
|
|
||||||
|
function errorHandler() {
|
||||||
|
echo "Error: ($1) occurred on line $2"
|
||||||
|
cleanTempFiles
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
trap 'errorHandler $? $LINENO' ERR
|
||||||
|
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
|
||||||
|
|
||||||
|
for element in "${APP[@]}"; do
|
||||||
|
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
echo "Manual cookie is unset";
|
||||||
|
getCookies;
|
||||||
|
else
|
||||||
|
echo "Manual cookie is set. Skipping get cookie step";
|
||||||
|
fi
|
||||||
|
|
||||||
|
getNumPages
|
||||||
|
getAllCalls
|
||||||
|
convertJson2Csv
|
||||||
|
compressData
|
||||||
|
cleanTempFiles
|
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -eE
|
||||||
|
|
||||||
|
ROWS=50000
|
||||||
|
PAGES=0
|
||||||
|
ND=$(date +%s%N | cut -b1-13)
|
||||||
|
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
|
||||||
|
F_CONCESSIONS='concessions.csv'
|
||||||
|
F_LEGAL='legal.csv'
|
||||||
|
F_TAR_GZ='concessions.tar.gz'
|
||||||
|
F_COOKIES='cookies.txt'
|
||||||
|
APP=(wget jq csvgrep tar gzip)
|
||||||
|
|
||||||
|
function getCookies() {
|
||||||
|
echo "Getting cookies..."
|
||||||
|
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetchConcessions() {
|
||||||
|
local outputFile="page_$(printf %03d "$1").json"
|
||||||
|
|
||||||
|
echo "Downloading page $1 of $2..."
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
|
||||||
|
else
|
||||||
|
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
export -f fetchConcessions
|
||||||
|
|
||||||
|
function getNumPages() {
|
||||||
|
local page=1
|
||||||
|
local outputFile="page_$(printf %03d $page).json"
|
||||||
|
|
||||||
|
echo "Getting number of pages..."
|
||||||
|
|
||||||
|
fetchConcessions $page $PAGES
|
||||||
|
|
||||||
|
PAGES=$(jq '.total' "$outputFile")
|
||||||
|
local records=$(jq '.records' "$outputFile")
|
||||||
|
|
||||||
|
echo "Total pages: $PAGES"
|
||||||
|
echo "Total records: $records"
|
||||||
|
|
||||||
|
if [[ $PAGES -le 0 ]]; then
|
||||||
|
errorHandler "Cannot get number of pages" "$LINENO"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllConcessions() {
|
||||||
|
echo "Downloading all concessions..."
|
||||||
|
|
||||||
|
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page $PAGES"
|
||||||
|
}
|
||||||
|
|
||||||
|
function convertJson2Csv() {
|
||||||
|
echo "Converting JSON to CSV..."
|
||||||
|
|
||||||
|
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CONCESSIONS
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAllLegal() {
|
||||||
|
echo "Extracting legal concessions..."
|
||||||
|
|
||||||
|
csvgrep --no-header-row \
|
||||||
|
--columns 10 \
|
||||||
|
--regex '^[A-HJP-SUV]\d{7}[0-9A-J] ' \
|
||||||
|
$F_CONCESSIONS > $F_LEGAL
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressData() {
|
||||||
|
echo "Compressing data..."
|
||||||
|
|
||||||
|
tar -czf $F_TAR_GZ $F_CONCESSIONS $F_LEGAL
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanTempFiles() {
|
||||||
|
echo "Cleaning temporary files..."
|
||||||
|
|
||||||
|
rm -f ./*.json $F_COOKIES
|
||||||
|
}
|
||||||
|
|
||||||
|
function errorHandler() {
|
||||||
|
echo "Error: ($1) occurred on line $2"
|
||||||
|
cleanTempFiles
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
trap 'errorHandler $? $LINENO' ERR
|
||||||
|
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
|
||||||
|
|
||||||
|
for element in "${APP[@]}"; do
|
||||||
|
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z ${COOKIE+x} ]; then
|
||||||
|
echo "Manual cookie is unset";
|
||||||
|
getCookies;
|
||||||
|
else
|
||||||
|
echo "Manual cookie is set. Skipping get cookie step";
|
||||||
|
fi
|
||||||
|
|
||||||
|
getNumPages
|
||||||
|
getAllConcessions
|
||||||
|
convertJson2Csv
|
||||||
|
getAllLegal
|
||||||
|
compressData
|
||||||
|
cleanTempFiles
|
Loading…
Reference in New Issue