Compare commits
25 Commits
main
...
feature/do
Author | SHA1 | Date |
---|---|---|
Amab | bcec4f36d7 | |
Amab | 9bf87100ba | |
Amab | 51f3425150 | |
Amab | c236cdb420 | |
Amab | 2abd83dcd7 | |
Amab | fbc9bbc3b4 | |
Amab | b7f53cfd92 | |
Amab | c90d00b005 | |
Amab | ce76d4c0cb | |
Amab | f341b4eaa4 | |
Amab | 3094902af1 | |
Amab | 3c10992282 | |
Amab | c9f0af7dce | |
Amab | 8b53295dbb | |
Amab | c50ecac1de | |
Amab | b50c796556 | |
Amab | dec20b2659 | |
Amab | 9b6c7b7d36 | |
Amab | ac9eeb026a | |
Amab | 68c5e4acb2 | |
Amab | 1934626a34 | |
Amab | 37d9dc78dd | |
Amab | d90c5c21b1 | |
Amab | 3cc666e4d8 | |
Amab | a38da53f8a |
|
@ -0,0 +1,35 @@
|
|||
kind: pipeline
|
||||
name: default
|
||||
|
||||
steps:
|
||||
- name: Download concessions
|
||||
image: python:alpine
|
||||
environment:
|
||||
COOKIE:
|
||||
from_secret: cookie
|
||||
commands:
|
||||
- apk update && apk add wget jq tar gzip bash util-linux
|
||||
- pip3 install csvkit
|
||||
- cd scripts
|
||||
- chmod +x ./*.sh
|
||||
- ./concessions.sh
|
||||
- name: Download calls
|
||||
image: python:alpine
|
||||
environment:
|
||||
COOKIE:
|
||||
from_secret: cookie
|
||||
commands:
|
||||
- apk update && apk add wget jq tar gzip bash util-linux
|
||||
- cd scripts
|
||||
- chmod +x ./*.sh
|
||||
- ./calls.sh
|
||||
- name: Release data
|
||||
image: plugins/gitea-release
|
||||
settings:
|
||||
api_key:
|
||||
from_secret: api_key
|
||||
base_url: https://git.cuernodehipnos.es
|
||||
files: ./*.tar.gz
|
||||
depends_on:
|
||||
- Download concessions
|
||||
- Download calls
|
|
@ -1,2 +1,93 @@
|
|||
.DS_Store
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all
|
||||
|
||||
### Intellij+all ###
|
||||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/**/usage.statistics.xml
|
||||
.idea/**/dictionaries
|
||||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
||||
# Sensitive or high-churn files
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/dbnavigator.xml
|
||||
|
||||
# Gradle
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# Gradle and Maven with auto-import
|
||||
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||
# since they will be recreated, and may cause churn. Uncomment if using
|
||||
# auto-import.
|
||||
# .idea/artifacts
|
||||
# .idea/compiler.xml
|
||||
# .idea/jarRepositories.xml
|
||||
# .idea/modules.xml
|
||||
# .idea/*.iml
|
||||
# .idea/modules
|
||||
# *.iml
|
||||
# *.ipr
|
||||
|
||||
# CMake
|
||||
cmake-build-*/
|
||||
|
||||
# Mongo Explorer plugin
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
# File-based project format
|
||||
*.iws
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# SonarLint plugin
|
||||
.idea/sonarlint/
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
# Editor-based Rest Client
|
||||
.idea/httpRequests
|
||||
|
||||
# Android studio 3.1+ serialized cache file
|
||||
.idea/caches/build_file_checksums.ser
|
||||
|
||||
### Intellij+all Patch ###
|
||||
# Ignore everything but code style settings and run configurations
|
||||
# that are supposed to be shared within teams.
|
||||
|
||||
.idea/*
|
||||
|
||||
!.idea/codeStyles
|
||||
!.idea/runConfigurations
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/intellij+all
|
||||
/.idea/codeStyles/codeStyleConfig.xml
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
-----BEGIN CERTIFICATE-----
|
||||
MIIG1jCCBL6gAwIBAgIQNMarBE42mRJRyCULbJTWwDANBgkqhkiG9w0BAQsFADA7
|
||||
MQswCQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xGTAXBgNVBAsMEEFDIFJB
|
||||
SVogRk5NVC1SQ00wHhcNMTMwNjI0MTA1MjU5WhcNMjgwNjI0MTA1MjU5WjBHMQsw
|
||||
CQYDVQQGEwJFUzERMA8GA1UECgwIRk5NVC1SQ00xJTAjBgNVBAsMHEFDIENvbXBv
|
||||
bmVudGVzIEluZm9ybcOhdGljb3MwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
|
||||
AoIBAQCXVx8rdbF7/xY44CaSqzzGo5BhvzA8knxC/3KJYVzTf+CkOvMxMUDub8b0
|
||||
h38MDujm/RKZhBNOWbKhxF3U61ZVhcR9xOCciuS/soT80m3BByxAKcZsNka0jCA4
|
||||
XRkglDaAFxCHEZ06MOnvXsSOZDfPYahbQ3VFCVycJuhlHdAwSpmceQwcRYkR6YgX
|
||||
wTiyzCNGivMKAmRS3dItqDOmDW/nxiDFq/Jd8VWY7GFkwbbAeqYId8FjN8zfvafu
|
||||
nsB9SLFkUjPPMeqfmC7Bdh7HMxLpaOXROwH201cmlebiPkn0xSFxXFqwhhr6yN8U
|
||||
QYZ3O/+xdHLrS6DS9+CJUF6d09ijAgMBAAGjggLIMIICxDASBgNVHRMBAf8ECDAG
|
||||
AQH/AgEAMA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUGfhYLxTWpsybBJgIDUzX
|
||||
qwCng2UwgZgGCCsGAQUFBwEBBIGLMIGIMEkGCCsGAQUFBzABhj1odHRwOi8vb2Nz
|
||||
cGZubXRyY21jYS5jZXJ0LmZubXQuZXMvb2NzcGZubXRyY21jYS9PY3NwUmVzcG9u
|
||||
ZGVyMDsGCCsGAQUFBzAChi9odHRwOi8vd3d3LmNlcnQuZm5tdC5lcy9jZXJ0cy9B
|
||||
Q1JBSVpGTk1UUkNNLmNydDAfBgNVHSMEGDAWgBT3fcX9xOiaG3dkp/UdoMy/h2Ca
|
||||
bTCB6wYDVR0gBIHjMIHgMIHdBgRVHSAAMIHUMCkGCCsGAQUFBwIBFh1odHRwOi8v
|
||||
d3d3LmNlcnQuZm5tdC5lcy9kcGNzLzCBpgYIKwYBBQUHAgIwgZkMgZZTdWpldG8g
|
||||
YSBsYXMgY29uZGljaW9uZXMgZGUgdXNvIGV4cHVlc3RhcyBlbiBsYSBEZWNsYXJh
|
||||
Y2nDs24gZGUgUHLDoWN0aWNhcyBkZSBDZXJ0aWZpY2FjacOzbiBkZSBsYSBGTk1U
|
||||
LVJDTSAoIEMvIEpvcmdlIEp1YW4sIDEwNi0yODAwOS1NYWRyaWQtRXNwYcOxYSkw
|
||||
gdQGA1UdHwSBzDCByTCBxqCBw6CBwIaBkGxkYXA6Ly9sZGFwZm5tdC5jZXJ0LmZu
|
||||
bXQuZXMvQ049Q1JMLE9VPUFDJTIwUkFJWiUyMEZOTVQtUkNNLE89Rk5NVC1SQ00s
|
||||
Qz1FUz9hdXRob3JpdHlSZXZvY2F0aW9uTGlzdDtiaW5hcnk/YmFzZT9vYmplY3Rj
|
||||
bGFzcz1jUkxEaXN0cmlidXRpb25Qb2ludIYraHR0cDovL3d3dy5jZXJ0LmZubXQu
|
||||
ZXMvY3Jscy9BUkxGTk1UUkNNLmNybDANBgkqhkiG9w0BAQsFAAOCAgEAo2bsQ2xL
|
||||
Dcyodieqjd+uy/lfxDw/MbrAq/ZaNFkIlcypUYamOM4vrm5rz8oLjPCoLkJ48P+n
|
||||
P08Gkcl5Q6q6VFcZLia+U3gfHXrkyqToQlrtViGCGH3xA4u56XtMHGXSdk9vQ0yD
|
||||
nW5f7bUEkp+uvcKewrOvNcpbIAgD4eU7gdOS0w7BagcFRBgTKBw2s3z73fRZtouJ
|
||||
g/atmWYtXbBsfNjph+pCh+h5sbSyZUVzO5AemyjpYYYNMWDQrTXq+7O8zIPuPaNE
|
||||
SjEexuzn+VjHG90RlUK1LygARi+Ir0opD2w6erb/hK8Eea7MFdKQ2ASqNBGJggNo
|
||||
5vfPVvjHiL+Antmh7mQSKL+4YwFU64d4KK9k0C1mbJethDQFKcjTK1vMvnXFiups
|
||||
IuyTqwKauo7u2zMKzY4r3VYOW9TpMyLPFIY8pII5GyNzXlL0F4nscOvduTEPEYqx
|
||||
eNJfpDDPY/DO8WfxgdRTy2W3D/UoAulb+Y+nuzGGCtFQrsSMQX487R+aY0nWot/h
|
||||
ajef6BcPuxhDfQrg5IafrISVmcJAplb3tXhh0sz7RbYz6jf1bke4eU5fnrTMtGlV
|
||||
teUL2vjrfUPHW07kBJuaQ7sxORNV3bpHisOnHj+AriQzCn5vINpSHW6hTm7IfRkb
|
||||
ltu/aQrsMuUhP7HE/v+uXe5CuboV5ubZhHU=
|
||||
-----END CERTIFICATE-----
|
|
@ -0,0 +1,99 @@
|
|||
#!/usr/bin/env bash
|
||||
set -eE
|
||||
|
||||
ROWS=50000000
|
||||
PAGES=0
|
||||
ND=$(date +%s%N | cut -b1-13)
|
||||
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
|
||||
F_CALLS='calls.csv'
|
||||
F_TAR_GZ='calls.tar.gz'
|
||||
F_COOKIES='cookies.txt'
|
||||
APP=(wget jq tar gzip)
|
||||
|
||||
function getCookies() {
|
||||
echo "Getting cookies..."
|
||||
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
|
||||
}
|
||||
|
||||
function fetchCalls() {
|
||||
local outputFile="page_$(printf %03d "$1").json"
|
||||
|
||||
echo "Downloading page $1 of $2..."
|
||||
|
||||
if [ -z ${COOKIE+x} ]; then
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
|
||||
else
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=convs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=4&sord=asc"
|
||||
fi
|
||||
}
|
||||
export -f fetchCalls
|
||||
|
||||
function getNumPages() {
|
||||
local page=1
|
||||
local outputFile="page_$(printf %03d $page).json"
|
||||
|
||||
echo "Getting number of pages..."
|
||||
|
||||
fetchCalls $page $PAGES
|
||||
|
||||
PAGES=$(jq '.total' "$outputFile")
|
||||
local records=$(jq '.records' "$outputFile")
|
||||
|
||||
echo "Total pages: $PAGES"
|
||||
echo "Total records: $records"
|
||||
|
||||
if [[ $PAGES -le 0 ]]; then
|
||||
errorHandler "Cannot get number of pages" "$LINENO"
|
||||
fi
|
||||
}
|
||||
|
||||
function getAllCalls() {
|
||||
echo "Downloading all calls..."
|
||||
|
||||
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchCalls page $PAGES"
|
||||
}
|
||||
|
||||
function convertJson2Csv() {
|
||||
echo "Converting JSON to CSV..."
|
||||
|
||||
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CALLS
|
||||
}
|
||||
|
||||
function compressData() {
|
||||
echo "Compressing data..."
|
||||
|
||||
tar -czf $F_TAR_GZ $F_CALLS
|
||||
}
|
||||
|
||||
function cleanTempFiles() {
|
||||
echo "Cleaning temporary files..."
|
||||
|
||||
rm -f ./*.json $F_COOKIES
|
||||
}
|
||||
|
||||
function errorHandler() {
|
||||
echo "Error: ($1) occurred on line $2"
|
||||
cleanTempFiles
|
||||
exit 1
|
||||
}
|
||||
|
||||
trap 'errorHandler $? $LINENO' ERR
|
||||
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
|
||||
|
||||
for element in "${APP[@]}"; do
|
||||
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
|
||||
done
|
||||
|
||||
if [ -z ${COOKIE+x} ]; then
|
||||
echo "Manual cookie is unset";
|
||||
getCookies;
|
||||
else
|
||||
echo "Manual cookie is set. Skipping get cookie step";
|
||||
fi
|
||||
|
||||
getNumPages
|
||||
getAllCalls
|
||||
convertJson2Csv
|
||||
compressData
|
||||
cleanTempFiles
|
|
@ -0,0 +1,110 @@
|
|||
#!/usr/bin/env bash
|
||||
set -eE
|
||||
|
||||
ROWS=50000
|
||||
PAGES=0
|
||||
ND=$(date +%s%N | cut -b1-13)
|
||||
USER_AGENT='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0'
|
||||
F_CONCESSIONS='concessions.csv'
|
||||
F_LEGAL='legal.csv'
|
||||
F_TAR_GZ='concessions.tar.gz'
|
||||
F_COOKIES='cookies.txt'
|
||||
APP=(wget jq csvgrep tar gzip)
|
||||
|
||||
function getCookies() {
|
||||
echo "Getting cookies..."
|
||||
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --save-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" 'https://www.infosubvenciones.es/' &> /dev/null
|
||||
}
|
||||
|
||||
function fetchConcessions() {
|
||||
local outputFile="page_$(printf %03d "$1").json"
|
||||
|
||||
echo "Downloading page $1 of $2..."
|
||||
|
||||
if [ -z ${COOKIE+x} ]; then
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --load-cookies $F_COOKIES --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
|
||||
else
|
||||
wget -q --ca-certificate=AC_Componentes_Informaticos_SHA256.crt --keep-session-cookies --header "Cookie: $COOKIE" --header "User-Agent: $USER_AGENT" -O "$outputFile" "https://www.infosubvenciones.es/bdnstrans/busqueda?type=concs&_search=false&nd=$ND&rows=$ROWS&page=$1&sidx=8&sord=asc"
|
||||
fi
|
||||
}
|
||||
export -f fetchConcessions
|
||||
|
||||
function getNumPages() {
|
||||
local page=1
|
||||
local outputFile="page_$(printf %03d $page).json"
|
||||
|
||||
echo "Getting number of pages..."
|
||||
|
||||
fetchConcessions $page $PAGES
|
||||
|
||||
PAGES=$(jq '.total' "$outputFile")
|
||||
local records=$(jq '.records' "$outputFile")
|
||||
|
||||
echo "Total pages: $PAGES"
|
||||
echo "Total records: $records"
|
||||
|
||||
if [[ $PAGES -le 0 ]]; then
|
||||
errorHandler "Cannot get number of pages" "$LINENO"
|
||||
fi
|
||||
}
|
||||
|
||||
function getAllConcessions() {
|
||||
echo "Downloading all concessions..."
|
||||
|
||||
seq 2 "$PAGES" | xargs -I page -n 1 -P 5 bash -c "fetchConcessions page $PAGES"
|
||||
}
|
||||
|
||||
function convertJson2Csv() {
|
||||
echo "Converting JSON to CSV..."
|
||||
|
||||
for file in *.json; do jq -r '.rows[] | [.[]] | @csv' "$file"; done > $F_CONCESSIONS
|
||||
}
|
||||
|
||||
function getAllLegal() {
|
||||
echo "Extracting legal concessions..."
|
||||
|
||||
csvgrep --no-header-row \
|
||||
--columns 10 \
|
||||
--regex '^[A-HJP-SUV]\d{7}[0-9A-J] ' \
|
||||
$F_CONCESSIONS > $F_LEGAL
|
||||
}
|
||||
|
||||
function compressData() {
|
||||
echo "Compressing data..."
|
||||
|
||||
tar -czf $F_TAR_GZ $F_CONCESSIONS $F_LEGAL
|
||||
}
|
||||
|
||||
function cleanTempFiles() {
|
||||
echo "Cleaning temporary files..."
|
||||
|
||||
rm -f ./*.json $F_COOKIES
|
||||
}
|
||||
|
||||
function errorHandler() {
|
||||
echo "Error: ($1) occurred on line $2"
|
||||
cleanTempFiles
|
||||
exit 1
|
||||
}
|
||||
|
||||
trap 'errorHandler $? $LINENO' ERR
|
||||
trap "echo -e '\nTerminated by Ctrl+c'; cleanTempFiles; exit" INT
|
||||
|
||||
for element in "${APP[@]}"; do
|
||||
[ -z "$(whereis -b "$element" | awk {'print$2'})" ] && errorHandler "$element - Maybe it is not installed on the system. Sorry but I can't continue" "$LINENO"
|
||||
done
|
||||
|
||||
if [ -z ${COOKIE+x} ]; then
|
||||
echo "Manual cookie is unset";
|
||||
getCookies;
|
||||
else
|
||||
echo "Manual cookie is set. Skipping get cookie step";
|
||||
fi
|
||||
|
||||
getNumPages
|
||||
getAllConcessions
|
||||
convertJson2Csv
|
||||
getAllLegal
|
||||
compressData
|
||||
cleanTempFiles
|
Loading…
Reference in New Issue